1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <nfs/nfs4_clnt.h> 32 #include <nfs/rnode4.h> 33 #include <sys/systm.h> 34 #include <sys/cmn_err.h> 35 #include <sys/atomic.h> 36 37 static void nfs4_free_open_owner(nfs4_open_owner_t *, mntinfo4_t *); 38 static nfs4_open_owner_t *find_freed_open_owner(cred_t *, 39 nfs4_oo_hash_bucket_t *, mntinfo4_t *); 40 static open_delegation_type4 get_dtype(rnode4_t *); 41 42 #ifdef DEBUG 43 int nfs4_client_foo_debug = 0x0; 44 int nfs4_client_open_dg = 0x0; 45 /* 46 * If this is non-zero, the lockowner and openowner seqid sync primitives 47 * will intermittently return errors. 48 */ 49 static int seqid_sync_faults = 0; 50 #endif 51 52 stateid4 clnt_special0 = { 53 0, 54 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } 55 }; 56 57 stateid4 clnt_special1 = { 58 0xffffffff, 59 { 60 (char)0xff, (char)0xff, (char)0xff, (char)0xff, 61 (char)0xff, (char)0xff, (char)0xff, (char)0xff, 62 (char)0xff, (char)0xff, (char)0xff, (char)0xff 63 } 64 }; 65 66 /* finds hash bucket and locks it */ 67 static nfs4_oo_hash_bucket_t * 68 lock_bucket(cred_t *cr, mntinfo4_t *mi) 69 { 70 nfs4_oo_hash_bucket_t *bucketp; 71 uint32_t hash_key; 72 73 hash_key = (uint32_t)(crgetuid(cr) + crgetruid(cr)) 74 % NFS4_NUM_OO_BUCKETS; 75 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, "lock_bucket: " 76 "hash_key %d for cred %p", hash_key, (void*)cr)); 77 78 ASSERT(hash_key >= 0 && hash_key < NFS4_NUM_OO_BUCKETS); 79 ASSERT(mi != NULL); 80 ASSERT(mutex_owned(&mi->mi_lock)); 81 82 bucketp = &(mi->mi_oo_list[hash_key]); 83 mutex_enter(&bucketp->b_lock); 84 return (bucketp); 85 } 86 87 /* unlocks hash bucket pointed by bucket_ptr */ 88 static void 89 unlock_bucket(nfs4_oo_hash_bucket_t *bucketp) 90 { 91 mutex_exit(&bucketp->b_lock); 92 } 93 94 /* 95 * Removes the lock owner from the rnode's lock_owners list and frees the 96 * corresponding reference. 97 */ 98 void 99 nfs4_rnode_remove_lock_owner(rnode4_t *rp, nfs4_lock_owner_t *lop) 100 { 101 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 102 "nfs4_rnode_remove_lock_owner")); 103 104 mutex_enter(&rp->r_statev4_lock); 105 106 if (lop->lo_next_rnode == NULL) { 107 /* already removed from list */ 108 mutex_exit(&rp->r_statev4_lock); 109 return; 110 } 111 112 ASSERT(lop->lo_prev_rnode != NULL); 113 114 lop->lo_prev_rnode->lo_next_rnode = lop->lo_next_rnode; 115 lop->lo_next_rnode->lo_prev_rnode = lop->lo_prev_rnode; 116 117 lop->lo_next_rnode = lop->lo_prev_rnode = NULL; 118 119 mutex_exit(&rp->r_statev4_lock); 120 121 /* 122 * This would be an appropriate place for 123 * RELEASE_LOCKOWNER. For now, this is overkill 124 * because in the common case, close is going to 125 * release any lockowners anyway. 126 */ 127 lock_owner_rele(lop); 128 } 129 130 /* 131 * Remove all lock owners from the rnode's lock_owners list. Frees up 132 * their references from the list. 133 */ 134 135 void 136 nfs4_flush_lock_owners(rnode4_t *rp) 137 { 138 nfs4_lock_owner_t *lop; 139 140 mutex_enter(&rp->r_statev4_lock); 141 while (rp->r_lo_head.lo_next_rnode != &rp->r_lo_head) { 142 lop = rp->r_lo_head.lo_next_rnode; 143 lop->lo_prev_rnode->lo_next_rnode = lop->lo_next_rnode; 144 lop->lo_next_rnode->lo_prev_rnode = lop->lo_prev_rnode; 145 lop->lo_next_rnode = lop->lo_prev_rnode = NULL; 146 lock_owner_rele(lop); 147 } 148 mutex_exit(&rp->r_statev4_lock); 149 } 150 151 void 152 nfs4_clear_open_streams(rnode4_t *rp) 153 { 154 nfs4_open_stream_t *osp; 155 156 mutex_enter(&rp->r_os_lock); 157 while ((osp = list_head(&rp->r_open_streams)) != NULL) { 158 open_owner_rele(osp->os_open_owner); 159 list_remove(&rp->r_open_streams, osp); 160 mutex_destroy(&osp->os_sync_lock); 161 osp->os_open_owner = NULL; 162 kmem_free(osp, sizeof (*osp)); 163 } 164 mutex_exit(&rp->r_os_lock); 165 } 166 167 void 168 open_owner_hold(nfs4_open_owner_t *oop) 169 { 170 mutex_enter(&oop->oo_lock); 171 oop->oo_ref_count++; 172 mutex_exit(&oop->oo_lock); 173 } 174 175 /* 176 * Frees the open owner if the ref count hits zero. 177 */ 178 void 179 open_owner_rele(nfs4_open_owner_t *oop) 180 { 181 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 182 "open_owner_rele")); 183 184 mutex_enter(&oop->oo_lock); 185 oop->oo_ref_count--; 186 if (oop->oo_ref_count == 0) { 187 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 188 "open_owner_rele: freeing open owner")); 189 oop->oo_valid = 0; 190 mutex_exit(&oop->oo_lock); 191 /* 192 * Ok, we don't destroy the open owner, nor do we put it on 193 * the mntinfo4's free list just yet. We are lazy about it 194 * and let callers to find_open_owner() do that to keep locking 195 * simple. 196 */ 197 } else { 198 mutex_exit(&oop->oo_lock); 199 } 200 } 201 202 void 203 open_stream_hold(nfs4_open_stream_t *osp) 204 { 205 mutex_enter(&osp->os_sync_lock); 206 osp->os_ref_count++; 207 mutex_exit(&osp->os_sync_lock); 208 } 209 210 /* 211 * Frees the open stream and removes it from the rnode4's open streams list if 212 * the ref count drops to zero. 213 */ 214 void 215 open_stream_rele(nfs4_open_stream_t *osp, rnode4_t *rp) 216 { 217 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 218 "open_stream_rele")); 219 220 ASSERT(!mutex_owned(&rp->r_os_lock)); 221 222 mutex_enter(&osp->os_sync_lock); 223 ASSERT(osp->os_ref_count > 0); 224 osp->os_ref_count--; 225 if (osp->os_ref_count == 0) { 226 nfs4_open_owner_t *tmp_oop; 227 228 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 229 "open_stream_rele: freeing open stream")); 230 osp->os_valid = 0; 231 tmp_oop = osp->os_open_owner; 232 mutex_exit(&osp->os_sync_lock); 233 234 /* now see if we need to destroy the open owner */ 235 open_owner_rele(tmp_oop); 236 237 mutex_enter(&rp->r_os_lock); 238 list_remove(&rp->r_open_streams, osp); 239 mutex_exit(&rp->r_os_lock); 240 241 /* free up osp */ 242 mutex_destroy(&osp->os_sync_lock); 243 osp->os_open_owner = NULL; 244 kmem_free(osp, sizeof (*osp)); 245 } else { 246 mutex_exit(&osp->os_sync_lock); 247 } 248 } 249 250 void 251 lock_owner_hold(nfs4_lock_owner_t *lop) 252 { 253 mutex_enter(&lop->lo_lock); 254 lop->lo_ref_count++; 255 mutex_exit(&lop->lo_lock); 256 } 257 258 /* 259 * Frees the lock owner if the ref count hits zero and 260 * the structure no longer has no locks. 261 */ 262 void 263 lock_owner_rele(nfs4_lock_owner_t *lop) 264 { 265 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 266 "lock_owner_rele")); 267 268 mutex_enter(&lop->lo_lock); 269 lop->lo_ref_count--; 270 if (lop->lo_ref_count == 0) { 271 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 272 "lock_owner_rele: freeing lock owner: " 273 "%x", lop->lo_pid)); 274 lop->lo_valid = 0; 275 /* 276 * If there are no references, the lock_owner should 277 * already be off the rnode's list. 278 */ 279 ASSERT(lop->lo_next_rnode == NULL); 280 ASSERT(lop->lo_prev_rnode == NULL); 281 ASSERT(!(lop->lo_flags & NFS4_LOCK_SEQID_INUSE)); 282 ASSERT(lop->lo_seqid_holder == NULL); 283 mutex_exit(&lop->lo_lock); 284 285 /* free up lop */ 286 cv_destroy(&lop->lo_cv_seqid_sync); 287 mutex_destroy(&lop->lo_lock); 288 kmem_free(lop, sizeof (*lop)); 289 } else { 290 mutex_exit(&lop->lo_lock); 291 } 292 } 293 294 /* 295 * This increments the open owner ref count if found. 296 * The argument 'just_created' determines whether we are looking for open 297 * owners with the 'oo_just_created' flag set or not. 298 */ 299 nfs4_open_owner_t * 300 find_open_owner_nolock(cred_t *cr, int just_created, mntinfo4_t *mi) 301 { 302 nfs4_open_owner_t *oop = NULL, *next_oop; 303 nfs4_oo_hash_bucket_t *bucketp; 304 305 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 306 "find_open_owner: cred %p, just_created %d", 307 (void*)cr, just_created)); 308 309 ASSERT(mi != NULL); 310 ASSERT(mutex_owned(&mi->mi_lock)); 311 312 bucketp = lock_bucket(cr, mi); 313 314 /* got hash bucket, search through open owners */ 315 for (oop = list_head(&bucketp->b_oo_hash_list); oop != NULL; ) { 316 mutex_enter(&oop->oo_lock); 317 if (!crcmp(oop->oo_cred, cr) && 318 (oop->oo_just_created == just_created || 319 just_created == NFS4_JUST_CREATED)) { 320 /* match */ 321 if (oop->oo_valid == 0) { 322 /* reactivate the open owner */ 323 oop->oo_valid = 1; 324 ASSERT(oop->oo_ref_count == 0); 325 } 326 oop->oo_ref_count++; 327 mutex_exit(&oop->oo_lock); 328 unlock_bucket(bucketp); 329 return (oop); 330 } 331 next_oop = list_next(&bucketp->b_oo_hash_list, oop); 332 if (oop->oo_valid == 0) { 333 list_remove(&bucketp->b_oo_hash_list, oop); 334 335 /* 336 * Now we go ahead and put this open owner 337 * on the freed list. This is our lazy method. 338 */ 339 nfs4_free_open_owner(oop, mi); 340 } 341 342 mutex_exit(&oop->oo_lock); 343 oop = next_oop; 344 } 345 346 /* search through recently freed open owners */ 347 oop = find_freed_open_owner(cr, bucketp, mi); 348 349 unlock_bucket(bucketp); 350 351 return (oop); 352 } 353 354 nfs4_open_owner_t * 355 find_open_owner(cred_t *cr, int just_created, mntinfo4_t *mi) 356 { 357 nfs4_open_owner_t *oop; 358 359 mutex_enter(&mi->mi_lock); 360 oop = find_open_owner_nolock(cr, just_created, mi); 361 mutex_exit(&mi->mi_lock); 362 363 return (oop); 364 } 365 366 /* 367 * This increments osp's ref count if found. 368 * Returns with 'os_sync_lock' held. 369 */ 370 nfs4_open_stream_t * 371 find_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp) 372 { 373 nfs4_open_stream_t *osp; 374 375 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 376 "find_open_stream")); 377 378 mutex_enter(&rp->r_os_lock); 379 /* Now, no one can add or delete to rp's open streams list */ 380 for (osp = list_head(&rp->r_open_streams); osp != NULL; 381 osp = list_next(&rp->r_open_streams, osp)) { 382 mutex_enter(&osp->os_sync_lock); 383 if (osp->os_open_owner == oop && osp->os_valid != 0) { 384 /* match */ 385 NFS4_DEBUG(nfs4_client_state_debug, 386 (CE_NOTE, "find_open_stream " 387 "got a match")); 388 389 osp->os_ref_count++; 390 mutex_exit(&rp->r_os_lock); 391 return (osp); 392 } 393 mutex_exit(&osp->os_sync_lock); 394 } 395 396 mutex_exit(&rp->r_os_lock); 397 return (NULL); 398 } 399 400 /* 401 * Find the lock owner for the given file and process ID. If "which" is 402 * LOWN_VALID_STATEID, require that the lock owner contain a valid stateid 403 * from the server. 404 * 405 * This increments the lock owner's ref count if found. Returns NULL if 406 * there was no match. 407 */ 408 nfs4_lock_owner_t * 409 find_lock_owner(rnode4_t *rp, pid_t pid, lown_which_t which) 410 { 411 nfs4_lock_owner_t *lop, *next_lop; 412 413 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 414 "find_lock_owner: pid %x, which %d", pid, which)); 415 416 ASSERT(which == LOWN_ANY || which == LOWN_VALID_STATEID); 417 418 /* search by pid */ 419 mutex_enter(&rp->r_statev4_lock); 420 421 lop = rp->r_lo_head.lo_next_rnode; 422 while (lop != &rp->r_lo_head) { 423 mutex_enter(&lop->lo_lock); 424 if (lop->lo_pid == pid && lop->lo_valid != 0 && 425 !(lop->lo_flags & NFS4_BAD_SEQID_LOCK)) { 426 if (which == LOWN_ANY || 427 lop->lo_just_created != NFS4_JUST_CREATED) { 428 /* Found a matching lock owner */ 429 NFS4_DEBUG(nfs4_client_state_debug, 430 (CE_NOTE, "find_lock_owner: " 431 "got a match")); 432 433 lop->lo_ref_count++; 434 mutex_exit(&lop->lo_lock); 435 mutex_exit(&rp->r_statev4_lock); 436 return (lop); 437 } 438 } 439 next_lop = lop->lo_next_rnode; 440 mutex_exit(&lop->lo_lock); 441 lop = next_lop; 442 } 443 444 mutex_exit(&rp->r_statev4_lock); 445 return (NULL); 446 } 447 448 /* 449 * This returns the delegation stateid as 'sid'. Returns 1 if a successful 450 * delegation stateid was found, otherwise returns 0. 451 */ 452 453 static int 454 nfs4_get_deleg_stateid(rnode4_t *rp, nfs_opnum4 op, stateid4 *sid) 455 { 456 ASSERT(!mutex_owned(&rp->r_statev4_lock)); 457 458 mutex_enter(&rp->r_statev4_lock); 459 if (((rp->r_deleg_type == OPEN_DELEGATE_WRITE && op == OP_WRITE) || 460 (rp->r_deleg_type != OPEN_DELEGATE_NONE && op != OP_WRITE)) && 461 !rp->r_deleg_return_pending) { 462 463 *sid = rp->r_deleg_stateid; 464 mutex_exit(&rp->r_statev4_lock); 465 return (1); 466 } 467 mutex_exit(&rp->r_statev4_lock); 468 return (0); 469 } 470 471 /* 472 * This returns the lock stateid as 'sid'. Returns 1 if a successful lock 473 * stateid was found, otherwise returns 0. 474 */ 475 static int 476 nfs4_get_lock_stateid(rnode4_t *rp, pid_t pid, stateid4 *sid) 477 { 478 nfs4_lock_owner_t *lop; 479 480 lop = find_lock_owner(rp, pid, LOWN_VALID_STATEID); 481 482 if (lop) { 483 /* 484 * Found a matching lock owner, so use a lock 485 * stateid rather than an open stateid. 486 */ 487 mutex_enter(&lop->lo_lock); 488 *sid = lop->lock_stateid; 489 mutex_exit(&lop->lo_lock); 490 lock_owner_rele(lop); 491 return (1); 492 } 493 494 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 495 "nfs4_get_lock_stateid: no lop")); 496 return (0); 497 } 498 499 /* 500 * This returns the open stateid as 'sid'. Returns 1 if a successful open 501 * stateid was found, otherwise returns 0. 502 * 503 * Once the stateid is returned to the caller, it is no longer protected; 504 * so the caller must be prepared to handle OLD/BAD_STATEID where 505 * appropiate. 506 */ 507 static int 508 nfs4_get_open_stateid(rnode4_t *rp, cred_t *cr, mntinfo4_t *mi, stateid4 *sid) 509 { 510 nfs4_open_owner_t *oop; 511 nfs4_open_stream_t *osp; 512 513 ASSERT(mi != NULL); 514 515 oop = find_open_owner(cr, NFS4_PERM_CREATED, mi); 516 if (!oop) { 517 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 518 "nfs4_get_open_stateid: no oop")); 519 return (0); 520 } 521 522 osp = find_open_stream(oop, rp); 523 open_owner_rele(oop); 524 if (!osp) { 525 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 526 "nfs4_get_open_stateid: no osp")); 527 return (0); 528 } 529 530 if (osp->os_failed_reopen) { 531 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 532 "nfs4_get_open_stateid: osp %p failed reopen", 533 (void *)osp)); 534 mutex_exit(&osp->os_sync_lock); 535 open_stream_rele(osp, rp); 536 return (0); 537 } 538 *sid = osp->open_stateid; 539 mutex_exit(&osp->os_sync_lock); 540 open_stream_rele(osp, rp); 541 return (1); 542 } 543 544 /* 545 * Returns the delegation stateid if this 'op' is OP_WRITE and the 546 * delegation we hold is a write delegation, OR this 'op' is not 547 * OP_WRITE and we have a delegation held (read or write), otherwise 548 * returns the lock stateid if there is a lock owner, otherwise 549 * returns the open stateid if there is a open stream, otherwise 550 * returns special stateid <seqid = 0, other = 0>. 551 * 552 * Used for WRITE operations. 553 */ 554 stateid4 555 nfs4_get_w_stateid(cred_t *cr, rnode4_t *rp, pid_t pid, mntinfo4_t *mi, 556 nfs_opnum4 op, nfs4_stateid_types_t *sid_tp) 557 { 558 stateid4 sid; 559 560 if (nfs4_get_deleg_stateid(rp, op, &sid)) { 561 if (!stateid4_cmp(&sid, &sid_tp->d_sid)) { 562 sid_tp->cur_sid_type = DEL_SID; 563 return (sid); 564 } 565 } 566 if (nfs4_get_lock_stateid(rp, pid, &sid)) { 567 if (!stateid4_cmp(&sid, &sid_tp->l_sid)) { 568 sid_tp->cur_sid_type = LOCK_SID; 569 return (sid); 570 } 571 } 572 if (nfs4_get_open_stateid(rp, cr, mi, &sid)) { 573 if (!stateid4_cmp(&sid, &sid_tp->o_sid)) { 574 sid_tp->cur_sid_type = OPEN_SID; 575 return (sid); 576 } 577 } 578 bzero(&sid, sizeof (stateid4)); 579 sid_tp->cur_sid_type = SPEC_SID; 580 return (sid); 581 } 582 583 /* 584 * Returns the delegation stateid if this 'op' is OP_WRITE and the 585 * delegation we hold is a write delegation, OR this 'op' is not 586 * OP_WRITE and we have a delegation held (read or write), otherwise 587 * returns the lock stateid if there is a lock owner, otherwise 588 * returns the open stateid if there is a open stream, otherwise 589 * returns special stateid <seqid = 0, other = 0>. 590 * 591 * This also updates which stateid we are using in 'sid_tp', skips 592 * previously attempted stateids, and skips checking higher priority 593 * stateids than the current level as dictated by 'sid_tp->cur_sid_type' 594 * for async reads. 595 * 596 * Used for READ and SETATTR operations. 597 */ 598 stateid4 599 nfs4_get_stateid(cred_t *cr, rnode4_t *rp, pid_t pid, mntinfo4_t *mi, 600 nfs_opnum4 op, nfs4_stateid_types_t *sid_tp, bool_t async_read) 601 { 602 stateid4 sid; 603 604 /* 605 * For asynchronous READs, do not attempt to retry from the start of 606 * the stateid priority list, just continue from where you last left 607 * off. 608 */ 609 if (async_read) { 610 switch (sid_tp->cur_sid_type) { 611 case NO_SID: 612 break; 613 case DEL_SID: 614 goto lock_stateid; 615 case LOCK_SID: 616 goto open_stateid; 617 case OPEN_SID: 618 goto special_stateid; 619 case SPEC_SID: 620 default: 621 cmn_err(CE_PANIC, "nfs4_get_stateid: illegal current " 622 "stateid type %d", sid_tp->cur_sid_type); 623 } 624 } 625 626 if (nfs4_get_deleg_stateid(rp, op, &sid)) { 627 if (!stateid4_cmp(&sid, &sid_tp->d_sid)) { 628 sid_tp->cur_sid_type = DEL_SID; 629 return (sid); 630 } 631 } 632 lock_stateid: 633 if (nfs4_get_lock_stateid(rp, pid, &sid)) { 634 if (!stateid4_cmp(&sid, &sid_tp->l_sid)) { 635 sid_tp->cur_sid_type = LOCK_SID; 636 return (sid); 637 } 638 } 639 open_stateid: 640 if (nfs4_get_open_stateid(rp, cr, mi, &sid)) { 641 if (!stateid4_cmp(&sid, &sid_tp->o_sid)) { 642 sid_tp->cur_sid_type = OPEN_SID; 643 return (sid); 644 } 645 } 646 special_stateid: 647 bzero(&sid, sizeof (stateid4)); 648 sid_tp->cur_sid_type = SPEC_SID; 649 return (sid); 650 } 651 652 void 653 nfs4_set_lock_stateid(nfs4_lock_owner_t *lop, stateid4 stateid) 654 { 655 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 656 "nfs4_set_lock_stateid")); 657 658 ASSERT(lop); 659 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE); 660 661 mutex_enter(&lop->lo_lock); 662 lop->lock_stateid = stateid; 663 mutex_exit(&lop->lo_lock); 664 } 665 666 /* 667 * Sequence number used when a new open owner is needed. 668 * This is used so as to not confuse the server. Since a open owner 669 * is based off of cred, a cred could be re-used quickly, and the server 670 * may not release all state for a cred. 671 */ 672 static uint64_t open_owner_seq_num = 0; 673 674 uint64_t 675 nfs4_get_new_oo_name(void) 676 { 677 return (atomic_add_64_nv(&open_owner_seq_num, 1)); 678 } 679 680 /* 681 * Create a new open owner and add it to the open owner hash table. 682 */ 683 nfs4_open_owner_t * 684 create_open_owner(cred_t *cr, mntinfo4_t *mi) 685 { 686 nfs4_open_owner_t *oop; 687 nfs4_oo_hash_bucket_t *bucketp; 688 689 oop = kmem_alloc(sizeof (nfs4_open_owner_t), KM_SLEEP); 690 /* 691 * Make sure the cred doesn't go away when we put this open owner 692 * on the free list, as well as make crcmp() a valid check. 693 */ 694 crhold(cr); 695 oop->oo_cred = cr; 696 mutex_init(&oop->oo_lock, NULL, MUTEX_DEFAULT, NULL); 697 oop->oo_ref_count = 1; 698 oop->oo_valid = 1; 699 oop->oo_just_created = NFS4_JUST_CREATED; 700 oop->oo_seqid = 0; 701 oop->oo_seqid_inuse = 0; 702 oop->oo_last_good_seqid = 0; 703 oop->oo_last_good_op = TAG_NONE; 704 oop->oo_cred_otw = NULL; 705 cv_init(&oop->oo_cv_seqid_sync, NULL, CV_DEFAULT, NULL); 706 707 /* 708 * A Solaris open_owner is <oo_seq_num> 709 */ 710 oop->oo_name = nfs4_get_new_oo_name(); 711 712 /* now add the struct into the cred hash table */ 713 ASSERT(mutex_owned(&mi->mi_lock)); 714 bucketp = lock_bucket(cr, mi); 715 list_insert_head(&bucketp->b_oo_hash_list, oop); 716 unlock_bucket(bucketp); 717 718 return (oop); 719 } 720 721 /* 722 * Create a new open stream and it to the rnode's list. 723 * Increments the ref count on oop. 724 * Returns with 'os_sync_lock' held. 725 */ 726 nfs4_open_stream_t * 727 create_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp) 728 { 729 nfs4_open_stream_t *osp; 730 731 #ifdef DEBUG 732 mutex_enter(&oop->oo_lock); 733 ASSERT(oop->oo_seqid_inuse); 734 mutex_exit(&oop->oo_lock); 735 #endif 736 737 osp = kmem_alloc(sizeof (nfs4_open_stream_t), KM_SLEEP); 738 osp->os_open_ref_count = 1; 739 osp->os_mapcnt = 0; 740 osp->os_ref_count = 2; 741 osp->os_valid = 1; 742 osp->os_open_owner = oop; 743 osp->os_orig_oo_name = oop->oo_name; 744 bzero(&osp->open_stateid, sizeof (stateid4)); 745 osp->os_share_acc_read = 0; 746 osp->os_share_acc_write = 0; 747 osp->os_mmap_read = 0; 748 osp->os_mmap_write = 0; 749 osp->os_share_deny_none = 0; 750 osp->os_share_deny_read = 0; 751 osp->os_share_deny_write = 0; 752 osp->os_delegation = 0; 753 osp->os_dc_openacc = 0; 754 osp->os_final_close = 0; 755 osp->os_pending_close = 0; 756 osp->os_failed_reopen = 0; 757 osp->os_force_close = 0; 758 mutex_init(&osp->os_sync_lock, NULL, MUTEX_DEFAULT, NULL); 759 760 /* open owner gets a reference */ 761 open_owner_hold(oop); 762 763 /* now add the open stream to rp */ 764 mutex_enter(&rp->r_os_lock); 765 mutex_enter(&osp->os_sync_lock); 766 list_insert_head(&rp->r_open_streams, osp); 767 mutex_exit(&rp->r_os_lock); 768 769 return (osp); 770 } 771 772 /* 773 * Returns an open stream with 'os_sync_lock' held. 774 * If the open stream is found (rather than created), its 775 * 'os_open_ref_count' is bumped. 776 * 777 * There is no race with two threads entering this function 778 * and creating two open streams for the same <oop, rp> pair. 779 * This is because the open seqid sync must be acquired, thus 780 * only allowing one thread in at a time. 781 */ 782 nfs4_open_stream_t * 783 find_or_create_open_stream(nfs4_open_owner_t *oop, rnode4_t *rp, 784 int *created_osp) 785 { 786 nfs4_open_stream_t *osp; 787 788 #ifdef DEBUG 789 mutex_enter(&oop->oo_lock); 790 ASSERT(oop->oo_seqid_inuse); 791 mutex_exit(&oop->oo_lock); 792 #endif 793 794 osp = find_open_stream(oop, rp); 795 if (!osp) { 796 osp = create_open_stream(oop, rp); 797 if (osp) 798 *created_osp = 1; 799 } else { 800 *created_osp = 0; 801 osp->os_open_ref_count++; 802 } 803 804 return (osp); 805 } 806 807 static uint64_t lock_owner_seq_num = 0; 808 809 /* 810 * Create a new lock owner and add it to the rnode's list. 811 * Assumes the rnode's r_statev4_lock is held. 812 * The created lock owner has a reference count of 2: one for the list and 813 * one for the caller to use. Returns the lock owner locked down. 814 */ 815 nfs4_lock_owner_t * 816 create_lock_owner(rnode4_t *rp, pid_t pid) 817 { 818 nfs4_lock_owner_t *lop; 819 820 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 821 "create_lock_owner: pid %x", pid)); 822 823 ASSERT(mutex_owned(&rp->r_statev4_lock)); 824 825 lop = kmem_alloc(sizeof (nfs4_lock_owner_t), KM_SLEEP); 826 lop->lo_ref_count = 2; 827 lop->lo_valid = 1; 828 bzero(&lop->lock_stateid, sizeof (stateid4)); 829 lop->lo_pid = pid; 830 lop->lock_seqid = 0; 831 lop->lo_pending_rqsts = 0; 832 lop->lo_just_created = NFS4_JUST_CREATED; 833 lop->lo_flags = 0; 834 lop->lo_seqid_holder = NULL; 835 836 /* 837 * A Solaris lock_owner is <seq_num><pid> 838 */ 839 lop->lock_owner_name.ln_seq_num = 840 atomic_add_64_nv(&lock_owner_seq_num, 1); 841 lop->lock_owner_name.ln_pid = pid; 842 843 cv_init(&lop->lo_cv_seqid_sync, NULL, CV_DEFAULT, NULL); 844 mutex_init(&lop->lo_lock, NULL, MUTEX_DEFAULT, NULL); 845 846 mutex_enter(&lop->lo_lock); 847 848 /* now add the lock owner to rp */ 849 lop->lo_prev_rnode = &rp->r_lo_head; 850 lop->lo_next_rnode = rp->r_lo_head.lo_next_rnode; 851 rp->r_lo_head.lo_next_rnode->lo_prev_rnode = lop; 852 rp->r_lo_head.lo_next_rnode = lop; 853 854 return (lop); 855 856 } 857 858 /* 859 * This sets the lock seqid of a lock owner. 860 */ 861 void 862 nfs4_set_lock_seqid(seqid4 seqid, nfs4_lock_owner_t *lop) 863 { 864 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 865 "nfs4_set_lock_seqid")); 866 867 ASSERT(lop != NULL); 868 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE); 869 870 lop->lock_seqid = seqid; 871 } 872 873 static void 874 nfs4_set_new_lock_owner_args(lock_owner4 *owner, pid_t pid) 875 { 876 nfs4_lo_name_t *cast_namep; 877 878 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 879 "nfs4_set_new_lock_owner_args")); 880 881 owner->owner_len = sizeof (*cast_namep); 882 owner->owner_val = kmem_alloc(owner->owner_len, KM_SLEEP); 883 /* 884 * A Solaris lock_owner is <seq_num><pid> 885 */ 886 cast_namep = (nfs4_lo_name_t *)owner->owner_val; 887 cast_namep->ln_seq_num = atomic_add_64_nv(&lock_owner_seq_num, 1); 888 cast_namep->ln_pid = pid; 889 } 890 891 /* 892 * Fill in the lock owner args. 893 */ 894 void 895 nfs4_setlockowner_args(lock_owner4 *owner, rnode4_t *rp, pid_t pid) 896 { 897 nfs4_lock_owner_t *lop; 898 899 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 900 "nfs4_setlockowner_args")); 901 902 /* This increments lop's ref count */ 903 lop = find_lock_owner(rp, pid, LOWN_VALID_STATEID); 904 905 if (!lop) 906 goto make_up_args; 907 908 mutex_enter(&lop->lo_lock); 909 owner->owner_len = sizeof (lop->lock_owner_name); 910 owner->owner_val = kmem_alloc(owner->owner_len, KM_SLEEP); 911 bcopy(&lop->lock_owner_name, owner->owner_val, 912 owner->owner_len); 913 mutex_exit(&lop->lo_lock); 914 lock_owner_rele(lop); 915 return; 916 917 make_up_args: 918 nfs4_set_new_lock_owner_args(owner, pid); 919 } 920 921 /* 922 * This ends our use of the open owner's open seqid by setting 923 * the appropiate flags and issuing a cv_signal to wake up another 924 * thread waiting to use the open seqid. 925 */ 926 927 void 928 nfs4_end_open_seqid_sync(nfs4_open_owner_t *oop) 929 { 930 mutex_enter(&oop->oo_lock); 931 ASSERT(oop->oo_seqid_inuse); 932 oop->oo_seqid_inuse = 0; 933 cv_broadcast(&oop->oo_cv_seqid_sync); 934 mutex_exit(&oop->oo_lock); 935 } 936 937 /* 938 * This starts our use of the open owner's open seqid by setting 939 * the oo_seqid_inuse to true. We will wait (forever) with a 940 * cv_wait() until we are woken up. 941 * 942 * Return values: 943 * 0 no problems 944 * EAGAIN caller should retry (like a recovery retry) 945 */ 946 int 947 nfs4_start_open_seqid_sync(nfs4_open_owner_t *oop, mntinfo4_t *mi) 948 { 949 int error = 0; 950 #ifdef DEBUG 951 static int ops = 0; /* fault injection */ 952 #endif 953 954 #ifdef DEBUG 955 if (seqid_sync_faults && curthread != mi->mi_recovthread && 956 ++ops % 5 == 0) 957 return (EAGAIN); 958 #endif 959 960 mutex_enter(&mi->mi_lock); 961 if ((mi->mi_flags & MI4_RECOV_ACTIV) && 962 curthread != mi->mi_recovthread) 963 error = EAGAIN; 964 mutex_exit(&mi->mi_lock); 965 if (error != 0) 966 goto done; 967 968 mutex_enter(&oop->oo_lock); 969 970 while (oop->oo_seqid_inuse) { 971 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, 972 "nfs4_start_open_seqid_sync waiting on cv")); 973 974 cv_wait(&oop->oo_cv_seqid_sync, &oop->oo_lock); 975 } 976 977 oop->oo_seqid_inuse = 1; 978 979 mutex_exit(&oop->oo_lock); 980 981 mutex_enter(&mi->mi_lock); 982 if ((mi->mi_flags & MI4_RECOV_ACTIV) && 983 curthread != mi->mi_recovthread) 984 error = EAGAIN; 985 mutex_exit(&mi->mi_lock); 986 987 if (error == EAGAIN) 988 nfs4_end_open_seqid_sync(oop); 989 990 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, 991 "nfs4_start_open_seqid_sync: error=%d", error)); 992 993 done: 994 return (error); 995 } 996 997 #ifdef DEBUG 998 int bypass_otw[2]; 999 #endif 1000 1001 /* 1002 * Checks to see if the OPEN OTW is necessary that is, if it's already 1003 * been opened with the same access and deny bits we are now asking for. 1004 * Note, this assumes that *vpp is a rnode. 1005 */ 1006 int 1007 nfs4_is_otw_open_necessary(nfs4_open_owner_t *oop, int flag, vnode_t *vp, 1008 int just_been_created, int *errorp, int acc, nfs4_recov_state_t *rsp) 1009 { 1010 rnode4_t *rp; 1011 nfs4_open_stream_t *osp; 1012 open_delegation_type4 dt; 1013 1014 rp = VTOR4(vp); 1015 1016 /* 1017 * Grab the delegation type. This function is protected against 1018 * the delegation being returned by virtue of start_op (called 1019 * by nfs4open_otw) taking the r_deleg_recall_lock in read mode, 1020 * delegreturn requires this lock in write mode to proceed. 1021 */ 1022 ASSERT(nfs_rw_lock_held(&rp->r_deleg_recall_lock, RW_READER)); 1023 dt = get_dtype(rp); 1024 1025 /* returns with 'os_sync_lock' held */ 1026 osp = find_open_stream(oop, rp); 1027 1028 if (osp) { 1029 uint32_t do_otw = 0; 1030 1031 if (osp->os_failed_reopen) { 1032 NFS4_DEBUG(nfs4_open_stream_debug, (CE_NOTE, 1033 "nfs4_is_otw_open_necessary: os_failed_reopen " 1034 "set on osp %p, cr %p, rp %s", (void *)osp, 1035 (void *)osp->os_open_owner->oo_cred, 1036 rnode4info(rp))); 1037 do_otw = 1; 1038 } 1039 1040 /* 1041 * check access/deny bits 1042 */ 1043 if (!do_otw && (flag & FREAD)) 1044 if (osp->os_share_acc_read == 0 && 1045 dt == OPEN_DELEGATE_NONE) 1046 do_otw = 1; 1047 1048 if (!do_otw && (flag & FWRITE)) 1049 if (osp->os_share_acc_write == 0 && 1050 dt != OPEN_DELEGATE_WRITE) 1051 do_otw = 1; 1052 1053 if (!do_otw) { 1054 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 1055 "nfs4_is_otw_open_necessary: can skip this " 1056 "open OTW")); 1057 if (!just_been_created) { 1058 osp->os_open_ref_count++; 1059 if (flag & FREAD) 1060 osp->os_share_acc_read++; 1061 if (flag & FWRITE) 1062 osp->os_share_acc_write++; 1063 osp->os_share_deny_none++; 1064 } 1065 1066 /* 1067 * Need to reset this bitfield for the possible case 1068 * where we were going to OTW CLOSE the file, got a 1069 * non-recoverable error, and before we could retry 1070 * the CLOSE, OPENed the file again. 1071 */ 1072 ASSERT(osp->os_open_owner->oo_seqid_inuse); 1073 osp->os_final_close = 0; 1074 osp->os_force_close = 0; 1075 1076 mutex_exit(&osp->os_sync_lock); 1077 open_stream_rele(osp, rp); 1078 1079 #ifdef DEBUG 1080 bypass_otw[0]++; 1081 #endif 1082 1083 *errorp = 0; 1084 return (0); 1085 } 1086 mutex_exit(&osp->os_sync_lock); 1087 open_stream_rele(osp, rp); 1088 1089 } else if (dt != OPEN_DELEGATE_NONE) { 1090 /* 1091 * Even if there isn't an open_stream yet, we may still be 1092 * able to bypass the otw open if the client owns a delegation. 1093 * 1094 * If you are asking for for WRITE, but I only have 1095 * a read delegation, then you still have to go otw. 1096 */ 1097 1098 if (flag & FWRITE && dt == OPEN_DELEGATE_READ) 1099 return (1); 1100 1101 /* 1102 * TODO - evaluate the nfsace4 1103 */ 1104 1105 /* 1106 * Check the access flags to make sure the caller 1107 * had permission. 1108 */ 1109 if (flag & FREAD && !(acc & VREAD)) 1110 return (1); 1111 1112 if (flag & FWRITE && !(acc & VWRITE)) 1113 return (1); 1114 1115 /* 1116 * create_open_stream will add a reference to oop, 1117 * this will prevent the open_owner_rele done in 1118 * nfs4open_otw from destroying the open_owner. 1119 */ 1120 1121 /* returns with 'os_sync_lock' held */ 1122 osp = create_open_stream(oop, rp); 1123 if (osp == NULL) 1124 return (1); 1125 1126 osp->open_stateid = rp->r_deleg_stateid; 1127 osp->os_delegation = 1; 1128 1129 if (flag & FREAD) 1130 osp->os_share_acc_read++; 1131 if (flag & FWRITE) 1132 osp->os_share_acc_write++; 1133 1134 osp->os_share_deny_none++; 1135 mutex_exit(&osp->os_sync_lock); 1136 1137 open_stream_rele(osp, rp); 1138 1139 mutex_enter(&oop->oo_lock); 1140 oop->oo_just_created = NFS4_PERM_CREATED; 1141 mutex_exit(&oop->oo_lock); 1142 1143 ASSERT(rsp != NULL); 1144 if (rsp->rs_sp != NULL) { 1145 mutex_enter(&rsp->rs_sp->s_lock); 1146 nfs4_inc_state_ref_count_nolock(rsp->rs_sp, 1147 VTOMI4(vp)); 1148 mutex_exit(&rsp->rs_sp->s_lock); 1149 } 1150 #ifdef DEBUG 1151 bypass_otw[1]++; 1152 #endif 1153 1154 *errorp = 0; 1155 return (0); 1156 } 1157 1158 return (1); 1159 } 1160 1161 static open_delegation_type4 1162 get_dtype(rnode4_t *rp) 1163 { 1164 open_delegation_type4 dt; 1165 1166 mutex_enter(&rp->r_statev4_lock); 1167 ASSERT(!rp->r_deleg_return_inprog); 1168 if (rp->r_deleg_return_pending) 1169 dt = OPEN_DELEGATE_NONE; 1170 else 1171 dt = rp->r_deleg_type; 1172 mutex_exit(&rp->r_statev4_lock); 1173 1174 return (dt); 1175 } 1176 1177 /* 1178 * Fill in *locker with the lock state arguments for a LOCK call. If 1179 * lop->lo_just_created == NFS4_JUST_CREATED, oop and osp must be non-NULL. 1180 * Caller must already hold the necessary seqid sync lock(s). 1181 */ 1182 1183 void 1184 nfs4_setup_lock_args(nfs4_lock_owner_t *lop, nfs4_open_owner_t *oop, 1185 nfs4_open_stream_t *osp, clientid4 clientid, locker4 *locker) 1186 { 1187 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE); 1188 if (lop->lo_just_created == NFS4_JUST_CREATED) { 1189 /* this is a new lock request */ 1190 open_to_lock_owner4 *nown; 1191 1192 ASSERT(oop != NULL); 1193 ASSERT(osp != NULL); 1194 1195 locker->new_lock_owner = TRUE; 1196 nown = &locker->locker4_u.open_owner; 1197 nown->open_seqid = nfs4_get_open_seqid(oop) + 1; 1198 mutex_enter(&osp->os_sync_lock); 1199 nown->open_stateid = osp->open_stateid; 1200 mutex_exit(&osp->os_sync_lock); 1201 nown->lock_seqid = lop->lock_seqid; /* initial, so no +1 */ 1202 1203 nown->lock_owner.clientid = clientid; 1204 nown->lock_owner.owner_len = sizeof (lop->lock_owner_name); 1205 nown->lock_owner.owner_val = 1206 kmem_alloc(nown->lock_owner.owner_len, KM_SLEEP); 1207 bcopy(&lop->lock_owner_name, nown->lock_owner.owner_val, 1208 nown->lock_owner.owner_len); 1209 } else { 1210 exist_lock_owner4 *eown; 1211 /* have an existing lock owner */ 1212 1213 locker->new_lock_owner = FALSE; 1214 eown = &locker->locker4_u.lock_owner; 1215 mutex_enter(&lop->lo_lock); 1216 eown->lock_stateid = lop->lock_stateid; 1217 mutex_exit(&lop->lo_lock); 1218 eown->lock_seqid = lop->lock_seqid + 1; 1219 } 1220 } 1221 1222 /* 1223 * This starts our use of the lock owner's lock seqid by setting 1224 * the lo_flags to NFS4_LOCK_SEQID_INUSE. We will wait (forever) 1225 * with a cv_wait() until we are woken up. 1226 * 1227 * Return values: 1228 * 0 no problems 1229 * EAGAIN caller should retry (like a recovery retry) 1230 */ 1231 int 1232 nfs4_start_lock_seqid_sync(nfs4_lock_owner_t *lop, mntinfo4_t *mi) 1233 { 1234 int error = 0; 1235 #ifdef DEBUG 1236 static int ops = 0; /* fault injection */ 1237 #endif 1238 1239 #ifdef DEBUG 1240 if (seqid_sync_faults && curthread != mi->mi_recovthread && 1241 ++ops % 7 == 0) 1242 return (EAGAIN); 1243 #endif 1244 1245 mutex_enter(&mi->mi_lock); 1246 if ((mi->mi_flags & MI4_RECOV_ACTIV) && 1247 curthread != mi->mi_recovthread) 1248 error = EAGAIN; 1249 mutex_exit(&mi->mi_lock); 1250 if (error != 0) 1251 goto done; 1252 1253 mutex_enter(&lop->lo_lock); 1254 1255 ASSERT(lop->lo_seqid_holder != curthread); 1256 while (lop->lo_flags & NFS4_LOCK_SEQID_INUSE) { 1257 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, 1258 "nfs4_start_lock_seqid_sync: waiting on cv")); 1259 1260 cv_wait(&lop->lo_cv_seqid_sync, &lop->lo_lock); 1261 } 1262 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, "nfs4_start_lock_seqid_sync: " 1263 "NFS4_LOCK_SEQID_INUSE")); 1264 1265 lop->lo_flags |= NFS4_LOCK_SEQID_INUSE; 1266 lop->lo_seqid_holder = curthread; 1267 mutex_exit(&lop->lo_lock); 1268 1269 mutex_enter(&mi->mi_lock); 1270 if ((mi->mi_flags & MI4_RECOV_ACTIV) && 1271 curthread != mi->mi_recovthread) 1272 error = EAGAIN; 1273 mutex_exit(&mi->mi_lock); 1274 1275 if (error == EAGAIN) 1276 nfs4_end_lock_seqid_sync(lop); 1277 1278 NFS4_DEBUG(nfs4_seqid_sync, (CE_NOTE, 1279 "nfs4_start_lock_seqid_sync: error=%d", error)); 1280 1281 done: 1282 return (error); 1283 } 1284 1285 /* 1286 * This ends our use of the lock owner's lock seqid by setting 1287 * the appropiate flags and issuing a cv_signal to wake up another 1288 * thread waiting to use the lock seqid. 1289 */ 1290 void 1291 nfs4_end_lock_seqid_sync(nfs4_lock_owner_t *lop) 1292 { 1293 mutex_enter(&lop->lo_lock); 1294 ASSERT(lop->lo_flags & NFS4_LOCK_SEQID_INUSE); 1295 ASSERT(lop->lo_seqid_holder == curthread); 1296 lop->lo_flags &= ~NFS4_LOCK_SEQID_INUSE; 1297 lop->lo_seqid_holder = NULL; 1298 cv_broadcast(&lop->lo_cv_seqid_sync); 1299 mutex_exit(&lop->lo_lock); 1300 } 1301 1302 /* 1303 * Returns a reference to a lock owner via lopp, which has its lock seqid 1304 * synchronization started. 1305 * If the lock owner is in the 'just_created' state, then we return its open 1306 * owner and open stream and start the open seqid synchronization. 1307 * 1308 * Return value: 1309 * NFS4_OK no problems 1310 * NFS4ERR_DELAY there is lost state to recover; caller should retry 1311 * NFS4ERR_IO no open stream 1312 */ 1313 nfsstat4 1314 nfs4_find_or_create_lock_owner(pid_t pid, rnode4_t *rp, cred_t *cr, 1315 nfs4_open_owner_t **oopp, nfs4_open_stream_t **ospp, 1316 nfs4_lock_owner_t **lopp) 1317 { 1318 nfs4_lock_owner_t *lop, *next_lop; 1319 mntinfo4_t *mi; 1320 int error = 0; 1321 nfsstat4 stat; 1322 1323 mi = VTOMI4(RTOV4(rp)); 1324 1325 mutex_enter(&rp->r_statev4_lock); 1326 1327 lop = rp->r_lo_head.lo_next_rnode; 1328 while (lop != &rp->r_lo_head) { 1329 mutex_enter(&lop->lo_lock); 1330 if (lop->lo_pid == pid && lop->lo_valid != 0) { 1331 /* Found a matching lock owner */ 1332 NFS4_DEBUG(nfs4_client_state_debug, 1333 (CE_NOTE, "nfs4_find_or_create_lock_owner: " 1334 "got a match")); 1335 lop->lo_ref_count++; 1336 break; 1337 } 1338 next_lop = lop->lo_next_rnode; 1339 mutex_exit(&lop->lo_lock); 1340 lop = next_lop; 1341 } 1342 1343 if (lop == &rp->r_lo_head) { 1344 /* create temporary lock owner */ 1345 lop = create_lock_owner(rp, pid); 1346 } 1347 mutex_exit(&rp->r_statev4_lock); 1348 1349 /* Have a locked down lock owner struct now */ 1350 if (lop->lo_just_created != NFS4_JUST_CREATED) { 1351 /* This is an existing lock owner */ 1352 *oopp = NULL; 1353 *ospp = NULL; 1354 } else { 1355 /* Lock owner doesn't exist yet */ 1356 1357 /* First grab open owner seqid synchronization */ 1358 mutex_exit(&lop->lo_lock); 1359 *oopp = find_open_owner(cr, NFS4_PERM_CREATED, mi); 1360 if (*oopp == NULL) 1361 goto kill_new_lop; 1362 error = nfs4_start_open_seqid_sync(*oopp, mi); 1363 if (error == EAGAIN) { 1364 stat = NFS4ERR_DELAY; 1365 goto failed; 1366 } 1367 *ospp = find_open_stream(*oopp, rp); 1368 if (*ospp == NULL) { 1369 nfs4_end_open_seqid_sync(*oopp); 1370 goto kill_new_lop; 1371 } 1372 if ((*ospp)->os_failed_reopen) { 1373 mutex_exit(&(*ospp)->os_sync_lock); 1374 NFS4_DEBUG((nfs4_open_stream_debug || 1375 nfs4_client_lock_debug), (CE_NOTE, 1376 "nfs4_find_or_create_lock_owner: os_failed_reopen;" 1377 "osp %p, cr %p, rp %s", (void *)(*ospp), 1378 (void *)cr, rnode4info(rp))); 1379 nfs4_end_open_seqid_sync(*oopp); 1380 stat = NFS4ERR_IO; 1381 goto failed; 1382 } 1383 mutex_exit(&(*ospp)->os_sync_lock); 1384 1385 /* 1386 * Now see if the lock owner has become permanent while we 1387 * had released our lock. 1388 */ 1389 mutex_enter(&lop->lo_lock); 1390 if (lop->lo_just_created != NFS4_JUST_CREATED) { 1391 nfs4_end_open_seqid_sync(*oopp); 1392 open_stream_rele(*ospp, rp); 1393 open_owner_rele(*oopp); 1394 *oopp = NULL; 1395 *ospp = NULL; 1396 } 1397 } 1398 mutex_exit(&lop->lo_lock); 1399 1400 error = nfs4_start_lock_seqid_sync(lop, mi); 1401 if (error == EAGAIN) { 1402 if (*oopp != NULL) 1403 nfs4_end_open_seqid_sync(*oopp); 1404 stat = NFS4ERR_DELAY; 1405 goto failed; 1406 } 1407 ASSERT(error == 0); 1408 1409 *lopp = lop; 1410 return (NFS4_OK); 1411 1412 kill_new_lop: 1413 /* 1414 * A previous CLOSE was attempted but got EINTR, but the application 1415 * continued to use the unspecified state file descriptor. But now the 1416 * open stream is gone (which could also destroy the open owner), hence 1417 * we can no longer continue. The calling function should return EIO 1418 * to the application. 1419 */ 1420 NFS4_DEBUG(nfs4_lost_rqst_debug || nfs4_client_lock_debug, 1421 (CE_NOTE, "nfs4_find_or_create_lock_owner: destroy newly created " 1422 "lop %p, oop %p, osp %p", (void *)lop, (void *)(*oopp), 1423 (void *)(*ospp))); 1424 1425 nfs4_rnode_remove_lock_owner(rp, lop); 1426 stat = NFS4ERR_IO; 1427 1428 failed: 1429 lock_owner_rele(lop); 1430 if (*oopp) { 1431 open_owner_rele(*oopp); 1432 *oopp = NULL; 1433 } 1434 if (*ospp) { 1435 open_stream_rele(*ospp, rp); 1436 *ospp = NULL; 1437 } 1438 return (stat); 1439 } 1440 1441 /* 1442 * This function grabs a recently freed open owner off of the freed open 1443 * owner list if there is a match on the cred 'cr'. It returns NULL if no 1444 * such match is found. It will set the 'oo_ref_count' and 'oo_valid' back 1445 * to both 1 (sane values) in the case a match is found. 1446 */ 1447 static nfs4_open_owner_t * 1448 find_freed_open_owner(cred_t *cr, nfs4_oo_hash_bucket_t *bucketp, 1449 mntinfo4_t *mi) 1450 { 1451 nfs4_open_owner_t *foop; 1452 1453 NFS4_DEBUG(nfs4_client_state_debug, (CE_NOTE, 1454 "find_freed_open_owner: cred %p", (void*)cr)); 1455 1456 ASSERT(mutex_owned(&mi->mi_lock)); 1457 ASSERT(mutex_owned(&bucketp->b_lock)); 1458 1459 /* got hash bucket, search through freed open owners */ 1460 for (foop = list_head(&mi->mi_foo_list); foop != NULL; 1461 foop = list_next(&mi->mi_foo_list, foop)) { 1462 if (!crcmp(foop->oo_cred, cr)) { 1463 NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE, 1464 "find_freed_open_owner: got a match open owner " 1465 "%p", (void *)foop)); 1466 foop->oo_ref_count = 1; 1467 foop->oo_valid = 1; 1468 list_remove(&mi->mi_foo_list, foop); 1469 mi->mi_foo_num--; 1470 1471 /* now add the struct into the cred hash table */ 1472 list_insert_head(&bucketp->b_oo_hash_list, foop); 1473 return (foop); 1474 } 1475 } 1476 1477 return (NULL); 1478 } 1479 1480 /* 1481 * Insert the newly freed 'oop' into the mi's freed oop list, 1482 * always at the head of the list. If we've already reached 1483 * our maximum allowed number of freed open owners (mi_foo_max), 1484 * then remove the LRU open owner on the list (namely the tail). 1485 */ 1486 static void 1487 nfs4_free_open_owner(nfs4_open_owner_t *oop, mntinfo4_t *mi) 1488 { 1489 nfs4_open_owner_t *lru_foop; 1490 1491 if (mi->mi_foo_num < mi->mi_foo_max) { 1492 NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE, 1493 "nfs4_free_open_owner: num free %d, max free %d, " 1494 "insert open owner %p for mntinfo4 %p", 1495 mi->mi_foo_num, mi->mi_foo_max, (void *)oop, 1496 (void *)mi)); 1497 list_insert_head(&mi->mi_foo_list, oop); 1498 mi->mi_foo_num++; 1499 return; 1500 } 1501 1502 /* need to replace a freed open owner */ 1503 1504 lru_foop = list_tail(&mi->mi_foo_list); 1505 1506 NFS4_DEBUG(nfs4_client_foo_debug, (CE_NOTE, 1507 "nfs4_free_open_owner: destroy %p, insert %p", 1508 (void *)lru_foop, (void *)oop)); 1509 1510 list_remove(&mi->mi_foo_list, lru_foop); 1511 nfs4_destroy_open_owner(lru_foop); 1512 1513 /* head always has latest freed oop */ 1514 list_insert_head(&mi->mi_foo_list, oop); 1515 } 1516 1517 void 1518 nfs4_destroy_open_owner(nfs4_open_owner_t *oop) 1519 { 1520 ASSERT(oop != NULL); 1521 1522 crfree(oop->oo_cred); 1523 if (oop->oo_cred_otw) 1524 crfree(oop->oo_cred_otw); 1525 mutex_destroy(&oop->oo_lock); 1526 cv_destroy(&oop->oo_cv_seqid_sync); 1527 kmem_free(oop, sizeof (*oop)); 1528 } 1529 1530 seqid4 1531 nfs4_get_open_seqid(nfs4_open_owner_t *oop) 1532 { 1533 ASSERT(oop->oo_seqid_inuse); 1534 return (oop->oo_seqid); 1535 } 1536 1537 /* 1538 * This set's the open seqid for a <open owner/ mntinfo4> pair. 1539 */ 1540 void 1541 nfs4_set_open_seqid(seqid4 seqid, nfs4_open_owner_t *oop, 1542 nfs4_tag_type_t tag_type) 1543 { 1544 ASSERT(oop->oo_seqid_inuse); 1545 oop->oo_seqid = seqid; 1546 oop->oo_last_good_seqid = seqid; 1547 oop->oo_last_good_op = tag_type; 1548 } 1549 1550 /* 1551 * This bumps the current open seqid for the open owner 'oop'. 1552 */ 1553 void 1554 nfs4_get_and_set_next_open_seqid(nfs4_open_owner_t *oop, 1555 nfs4_tag_type_t tag_type) 1556 { 1557 ASSERT(oop->oo_seqid_inuse); 1558 oop->oo_seqid++; 1559 oop->oo_last_good_seqid = oop->oo_seqid; 1560 oop->oo_last_good_op = tag_type; 1561 } 1562 1563 /* 1564 * If no open owner was provided, this function takes the cred to find an 1565 * open owner within the given mntinfo4_t. Either way we return the 1566 * open owner's OTW credential if it exists; otherwise returns the 1567 * supplied 'cr'. 1568 * 1569 * A hold is put on the returned credential, and it is up to the caller 1570 * to free the cred. 1571 */ 1572 cred_t * 1573 nfs4_get_otw_cred(cred_t *cr, mntinfo4_t *mi, nfs4_open_owner_t *provided_oop) 1574 { 1575 cred_t *ret_cr; 1576 nfs4_open_owner_t *oop = provided_oop; 1577 1578 if (oop == NULL) 1579 oop = find_open_owner(cr, NFS4_PERM_CREATED, mi); 1580 if (oop != NULL) { 1581 mutex_enter(&oop->oo_lock); 1582 if (oop->oo_cred_otw) 1583 ret_cr = oop->oo_cred_otw; 1584 else 1585 ret_cr = cr; 1586 crhold(ret_cr); 1587 mutex_exit(&oop->oo_lock); 1588 if (provided_oop == NULL) 1589 open_owner_rele(oop); 1590 } else { 1591 ret_cr = cr; 1592 crhold(ret_cr); 1593 } 1594 return (ret_cr); 1595 } 1596 1597 /* 1598 * Retrieves the next open stream in the rnode's list if an open stream 1599 * is provided; otherwise gets the first open stream in the list. 1600 * The open owner for that open stream is then retrieved, and if its 1601 * oo_cred_otw exists then it is returned; otherwise the provided 'cr' 1602 * is returned. *osp is set to the 'found' open stream. 1603 * 1604 * Note: we don't set *osp to the open stream retrieved via the 1605 * optimized check since that won't necessarily be at the beginning 1606 * of the rnode list, and if that osp doesn't work we'd like to 1607 * check _all_ open streams (starting from the beginning of the 1608 * rnode list). 1609 */ 1610 cred_t * 1611 nfs4_get_otw_cred_by_osp(rnode4_t *rp, cred_t *cr, 1612 nfs4_open_stream_t **osp, bool_t *first_time, bool_t *last_time) 1613 { 1614 nfs4_open_stream_t *next_osp = NULL; 1615 cred_t *ret_cr; 1616 1617 ASSERT(cr != NULL); 1618 /* 1619 * As an optimization, try to find the open owner 1620 * for the cred provided since that's most likely 1621 * to work. 1622 */ 1623 if (*first_time) { 1624 nfs4_open_owner_t *oop; 1625 1626 oop = find_open_owner(cr, NFS4_PERM_CREATED, VTOMI4(RTOV4(rp))); 1627 if (oop) { 1628 next_osp = find_open_stream(oop, rp); 1629 if (next_osp) 1630 mutex_exit(&next_osp->os_sync_lock); 1631 open_owner_rele(oop); 1632 } else { 1633 next_osp = NULL; 1634 } 1635 } else { 1636 int delay_rele = 0; 1637 1638 /* return the next open stream for this rnode */ 1639 mutex_enter(&rp->r_os_lock); 1640 /* Now, no one can add or delete to rp's open streams list */ 1641 1642 if (*osp) { 1643 next_osp = list_next(&rp->r_open_streams, *osp); 1644 /* 1645 * Delay the rele of *osp until after we drop 1646 * r_os_lock to not deadlock with oo_lock 1647 * via an open_stream_rele()->open_owner_rele(). 1648 */ 1649 delay_rele = 1; 1650 } else { 1651 next_osp = list_head(&rp->r_open_streams); 1652 } 1653 if (next_osp) { 1654 nfs4_open_stream_t *tmp_osp; 1655 1656 /* find the next valid open stream */ 1657 mutex_enter(&next_osp->os_sync_lock); 1658 while (next_osp && !next_osp->os_valid) { 1659 tmp_osp = 1660 list_next(&rp->r_open_streams, next_osp); 1661 mutex_exit(&next_osp->os_sync_lock); 1662 next_osp = tmp_osp; 1663 if (next_osp) 1664 mutex_enter(&next_osp->os_sync_lock); 1665 } 1666 if (next_osp) { 1667 next_osp->os_ref_count++; 1668 mutex_exit(&next_osp->os_sync_lock); 1669 } 1670 } 1671 mutex_exit(&rp->r_os_lock); 1672 if (delay_rele) 1673 open_stream_rele(*osp, rp); 1674 } 1675 1676 if (next_osp) { 1677 nfs4_open_owner_t *oop; 1678 1679 oop = next_osp->os_open_owner; 1680 mutex_enter(&oop->oo_lock); 1681 if (oop->oo_cred_otw) 1682 ret_cr = oop->oo_cred_otw; 1683 else 1684 ret_cr = cr; 1685 crhold(ret_cr); 1686 mutex_exit(&oop->oo_lock); 1687 if (*first_time) { 1688 open_stream_rele(next_osp, rp); 1689 *osp = NULL; 1690 } else 1691 *osp = next_osp; 1692 } else { 1693 /* just return the cred provided to us */ 1694 if (*first_time != TRUE) 1695 *last_time = TRUE; 1696 *osp = NULL; 1697 ret_cr = cr; 1698 crhold(ret_cr); 1699 } 1700 1701 if (*first_time) 1702 *first_time = FALSE; 1703 return (ret_cr); 1704 } 1705 1706 void 1707 nfs4_init_stateid_types(nfs4_stateid_types_t *sid_tp) 1708 { 1709 bzero(&sid_tp->d_sid, sizeof (stateid4)); 1710 bzero(&sid_tp->l_sid, sizeof (stateid4)); 1711 bzero(&sid_tp->o_sid, sizeof (stateid4)); 1712 sid_tp->cur_sid_type = NO_SID; 1713 } 1714 1715 void 1716 nfs4_save_stateid(stateid4 *s1, nfs4_stateid_types_t *sid_tp) 1717 { 1718 NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE, 1719 "nfs4_save_stateid: saved %s stateid", 1720 sid_tp->cur_sid_type == DEL_SID ? "delegation" : 1721 sid_tp->cur_sid_type == LOCK_SID ? "lock" : 1722 sid_tp->cur_sid_type == OPEN_SID ? "open" : "special")); 1723 1724 switch (sid_tp->cur_sid_type) { 1725 case DEL_SID: 1726 sid_tp->d_sid = *s1; 1727 break; 1728 case LOCK_SID: 1729 sid_tp->l_sid = *s1; 1730 break; 1731 case OPEN_SID: 1732 sid_tp->o_sid = *s1; 1733 break; 1734 case SPEC_SID: 1735 default: 1736 cmn_err(CE_PANIC, "nfs4_save_stateid: illegal " 1737 "stateid type %d", sid_tp->cur_sid_type); 1738 } 1739 } 1740 1741 /* 1742 * We got NFS4ERR_BAD_SEQID. Setup some arguments to pass to recovery. 1743 * Caller is responsible for freeing. 1744 */ 1745 nfs4_bseqid_entry_t * 1746 nfs4_create_bseqid_entry(nfs4_open_owner_t *oop, nfs4_lock_owner_t *lop, 1747 vnode_t *vp, pid_t pid, nfs4_tag_type_t tag, seqid4 seqid) 1748 { 1749 nfs4_bseqid_entry_t *bsep; 1750 1751 bsep = kmem_alloc(sizeof (*bsep), KM_SLEEP); 1752 bsep->bs_oop = oop; 1753 bsep->bs_lop = lop; 1754 bsep->bs_vp = vp; 1755 bsep->bs_pid = pid; 1756 bsep->bs_tag = tag; 1757 bsep->bs_seqid = seqid; 1758 1759 return (bsep); 1760 } 1761 1762 void 1763 nfs4open_dg_save_lost_rqst(int error, nfs4_lost_rqst_t *lost_rqstp, 1764 nfs4_open_owner_t *oop, nfs4_open_stream_t *osp, cred_t *cr, 1765 vnode_t *vp, int access_close, int deny_close) 1766 { 1767 lost_rqstp->lr_putfirst = FALSE; 1768 1769 ASSERT(vp != NULL); 1770 if (error == ETIMEDOUT || error == EINTR || 1771 NFS4_FRC_UNMT_ERR(error, vp->v_vfsp)) { 1772 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 1773 "nfs4open_dg_save_lost_rqst: error %d", error)); 1774 1775 lost_rqstp->lr_op = OP_OPEN_DOWNGRADE; 1776 /* 1777 * The vp is held and rele'd via the recovery code. 1778 * See nfs4_save_lost_rqst. 1779 */ 1780 lost_rqstp->lr_vp = vp; 1781 lost_rqstp->lr_dvp = NULL; 1782 lost_rqstp->lr_oop = oop; 1783 lost_rqstp->lr_osp = osp; 1784 lost_rqstp->lr_lop = NULL; 1785 lost_rqstp->lr_cr = cr; 1786 lost_rqstp->lr_flk = NULL; 1787 lost_rqstp->lr_dg_acc = access_close; 1788 lost_rqstp->lr_dg_deny = deny_close; 1789 lost_rqstp->lr_putfirst = FALSE; 1790 } else { 1791 lost_rqstp->lr_op = 0; 1792 } 1793 } 1794 1795 /* 1796 * Change the access and deny bits of an OPEN. 1797 * If recovery is needed, *recov_credpp is set to the cred used OTW, 1798 * a hold is placed on it, and *recov_seqidp is set to the seqid used OTW. 1799 */ 1800 void 1801 nfs4_open_downgrade(int access_close, int deny_close, nfs4_open_owner_t *oop, 1802 nfs4_open_stream_t *osp, vnode_t *vp, cred_t *cr, nfs4_lost_rqst_t *lrp, 1803 nfs4_error_t *ep, cred_t **recov_credpp, seqid4 *recov_seqidp) 1804 { 1805 mntinfo4_t *mi; 1806 int downgrade_acc, downgrade_deny; 1807 int new_acc, new_deny; 1808 COMPOUND4args_clnt args; 1809 COMPOUND4res_clnt res; 1810 OPEN_DOWNGRADE4res *odg_res; 1811 nfs_argop4 argop[3]; 1812 nfs_resop4 *resop; 1813 rnode4_t *rp; 1814 bool_t needrecov = FALSE; 1815 int doqueue = 1; 1816 seqid4 seqid = 0; 1817 cred_t *cred_otw; 1818 hrtime_t t; 1819 1820 ASSERT(mutex_owned(&osp->os_sync_lock)); 1821 #if DEBUG 1822 mutex_enter(&oop->oo_lock); 1823 ASSERT(oop->oo_seqid_inuse); 1824 mutex_exit(&oop->oo_lock); 1825 #endif 1826 1827 1828 if (access_close == 0 && deny_close == 0) { 1829 nfs4_error_zinit(ep); 1830 return; 1831 } 1832 1833 cred_otw = nfs4_get_otw_cred(cr, VTOMI4(vp), oop); 1834 1835 cred_retry: 1836 nfs4_error_zinit(ep); 1837 downgrade_acc = 0; 1838 downgrade_deny = 0; 1839 mi = VTOMI4(vp); 1840 rp = VTOR4(vp); 1841 1842 /* 1843 * Check to see if the open stream got closed before we go OTW, 1844 * now that we have acquired the 'os_sync_lock'. 1845 */ 1846 if (!osp->os_valid) { 1847 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:" 1848 " open stream has already been closed, return success")); 1849 /* error has already been set */ 1850 goto no_args_out; 1851 } 1852 1853 /* If the file failed recovery, just quit. */ 1854 mutex_enter(&rp->r_statelock); 1855 if (rp->r_flags & R4RECOVERR) { 1856 mutex_exit(&rp->r_statelock); 1857 ep->error = EIO; 1858 goto no_args_out; 1859 } 1860 mutex_exit(&rp->r_statelock); 1861 1862 seqid = nfs4_get_open_seqid(oop) + 1; 1863 1864 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:" 1865 "access_close %d, acc_read %"PRIu64" acc_write %"PRIu64"", 1866 access_close, osp->os_share_acc_read, osp->os_share_acc_write)); 1867 1868 /* If we're closing the last READ, need to downgrade */ 1869 if ((access_close & FREAD) && (osp->os_share_acc_read == 1)) 1870 downgrade_acc |= OPEN4_SHARE_ACCESS_READ; 1871 1872 /* if we're closing the last WRITE, need to downgrade */ 1873 if ((access_close & FWRITE) && (osp->os_share_acc_write == 1)) 1874 downgrade_acc |= OPEN4_SHARE_ACCESS_WRITE; 1875 1876 downgrade_deny = OPEN4_SHARE_DENY_NONE; 1877 1878 new_acc = 0; 1879 new_deny = 0; 1880 1881 /* set our new access and deny share bits */ 1882 if ((osp->os_share_acc_read > 0) && 1883 !(downgrade_acc & OPEN4_SHARE_ACCESS_READ)) 1884 new_acc |= OPEN4_SHARE_ACCESS_READ; 1885 if ((osp->os_share_acc_write > 0) && 1886 !(downgrade_acc & OPEN4_SHARE_ACCESS_WRITE)) 1887 new_acc |= OPEN4_SHARE_ACCESS_WRITE; 1888 1889 new_deny = OPEN4_SHARE_DENY_NONE; 1890 1891 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:" 1892 "downgrade acc 0x%x deny 0x%x", downgrade_acc, downgrade_deny)); 1893 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, "nfs4_open_downgrade:" 1894 "new acc 0x%x deny 0x%x", new_acc, new_deny)); 1895 1896 /* 1897 * Check to see if we aren't actually doing any downgrade or 1898 * if this is the last 'close' but the file is still mmapped. 1899 * Skip this if this a lost request resend so we don't decrement 1900 * the osp's share counts more than once. 1901 */ 1902 if (!lrp && 1903 ((downgrade_acc == 0 && downgrade_deny == 0) || 1904 (new_acc == 0 && new_deny == 0))) { 1905 /* 1906 * No downgrade to do, but still need to 1907 * update osp's os_share_* counts. 1908 */ 1909 NFS4_DEBUG(nfs4_client_open_dg, (CE_NOTE, 1910 "nfs4_open_downgrade: just lower the osp's count by %s", 1911 (access_close & FREAD) && (access_close & FWRITE) ? 1912 "read and write" : (access_close & FREAD) ? "read" : 1913 (access_close & FWRITE) ? "write" : "bogus")); 1914 if (access_close & FREAD) 1915 osp->os_share_acc_read--; 1916 if (access_close & FWRITE) 1917 osp->os_share_acc_write--; 1918 osp->os_share_deny_none--; 1919 nfs4_error_zinit(ep); 1920 1921 goto no_args_out; 1922 } 1923 1924 if (osp->os_orig_oo_name != oop->oo_name) { 1925 ep->error = EIO; 1926 goto no_args_out; 1927 } 1928 1929 /* setup the COMPOUND args */ 1930 if (lrp) 1931 args.ctag = TAG_OPEN_DG_LOST; 1932 else 1933 args.ctag = TAG_OPEN_DG; 1934 1935 args.array_len = 3; 1936 args.array = argop; 1937 1938 /* putfh */ 1939 argop[0].argop = OP_CPUTFH; 1940 argop[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh; 1941 1942 argop[1].argop = OP_GETATTR; 1943 argop[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK; 1944 argop[1].nfs_argop4_u.opgetattr.mi = mi; 1945 1946 ASSERT(mutex_owned(&osp->os_sync_lock)); 1947 ASSERT(osp->os_delegation == FALSE); 1948 1949 /* open downgrade */ 1950 argop[2].argop = OP_OPEN_DOWNGRADE; 1951 argop[2].nfs_argop4_u.opopen_downgrade.open_stateid = osp->open_stateid; 1952 argop[2].nfs_argop4_u.opopen_downgrade.share_access = new_acc; 1953 argop[2].nfs_argop4_u.opopen_downgrade.share_deny = new_deny; 1954 argop[2].nfs_argop4_u.opopen_downgrade.seqid = seqid; 1955 1956 t = gethrtime(); 1957 1958 rfs4call(mi, &args, &res, cred_otw, &doqueue, 0, ep); 1959 1960 if (ep->error == 0 && nfs4_need_to_bump_seqid(&res)) 1961 nfs4_set_open_seqid(seqid, oop, args.ctag); 1962 1963 if ((ep->error == EACCES || 1964 (ep->error == 0 && res.status == NFS4ERR_ACCESS)) && 1965 cred_otw != cr) { 1966 crfree(cred_otw); 1967 cred_otw = cr; 1968 crhold(cred_otw); 1969 if (!ep->error) 1970 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 1971 goto cred_retry; 1972 } 1973 1974 needrecov = nfs4_needs_recovery(ep, TRUE, mi->mi_vfsp); 1975 1976 if (needrecov && recov_credpp) { 1977 *recov_credpp = cred_otw; 1978 crhold(*recov_credpp); 1979 if (recov_seqidp) 1980 *recov_seqidp = seqid; 1981 } 1982 1983 if (!ep->error && !res.status) { 1984 /* get the open downgrade results */ 1985 resop = &res.array[2]; 1986 odg_res = &resop->nfs_resop4_u.opopen_downgrade; 1987 1988 osp->open_stateid = odg_res->open_stateid; 1989 1990 /* set the open streams new access/deny bits */ 1991 if (access_close & FREAD) 1992 osp->os_share_acc_read--; 1993 if (access_close & FWRITE) 1994 osp->os_share_acc_write--; 1995 osp->os_share_deny_none--; 1996 osp->os_dc_openacc = new_acc; 1997 1998 nfs4_attr_cache(vp, 1999 &res.array[1].nfs_resop4_u.opgetattr.ga_res, 2000 t, cred_otw, TRUE, NULL); 2001 } 2002 2003 if (!ep->error) 2004 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2005 2006 no_args_out: 2007 crfree(cred_otw); 2008 } 2009 2010 /* 2011 * If an OPEN request gets ETIMEDOUT or EINTR (that includes bailing out 2012 * because the filesystem was forcibly unmounted) then we don't know if we 2013 * potentially left state dangling on the server, therefore the recovery 2014 * framework makes this call to resend the OPEN request and then undo it. 2015 */ 2016 void 2017 nfs4_resend_open_otw(vnode_t **vpp, nfs4_lost_rqst_t *resend_rqstp, 2018 nfs4_error_t *ep) 2019 { 2020 COMPOUND4args_clnt args; 2021 COMPOUND4res_clnt res; 2022 nfs_argop4 argop[4]; 2023 GETFH4res *gf_res = NULL; 2024 OPEN4cargs *open_args; 2025 OPEN4res *op_res; 2026 char *destcfp; 2027 int destclen; 2028 nfs4_ga_res_t *garp; 2029 vnode_t *dvp = NULL, *vp = NULL; 2030 rnode4_t *rp = NULL, *drp = NULL; 2031 cred_t *cr = NULL; 2032 seqid4 seqid; 2033 nfs4_open_owner_t *oop = NULL; 2034 nfs4_open_stream_t *osp = NULL; 2035 component4 *srcfp; 2036 open_claim_type4 claim; 2037 mntinfo4_t *mi; 2038 int doqueue = 1; 2039 bool_t retry_open = FALSE; 2040 int created_osp = 0; 2041 hrtime_t t; 2042 char *failed_msg = ""; 2043 int fh_different; 2044 int reopen = 0; 2045 2046 nfs4_error_zinit(ep); 2047 2048 cr = resend_rqstp->lr_cr; 2049 dvp = resend_rqstp->lr_dvp; 2050 2051 vp = *vpp; 2052 if (vp) { 2053 ASSERT(nfs4_consistent_type(vp)); 2054 rp = VTOR4(vp); 2055 } 2056 2057 if (rp) { 2058 /* If the file failed recovery, just quit. */ 2059 mutex_enter(&rp->r_statelock); 2060 if (rp->r_flags & R4RECOVERR) { 2061 mutex_exit(&rp->r_statelock); 2062 ep->error = EIO; 2063 return; 2064 } 2065 mutex_exit(&rp->r_statelock); 2066 } 2067 2068 if (dvp) { 2069 drp = VTOR4(dvp); 2070 /* If the parent directory failed recovery, just quit. */ 2071 mutex_enter(&drp->r_statelock); 2072 if (drp->r_flags & R4RECOVERR) { 2073 mutex_exit(&drp->r_statelock); 2074 ep->error = EIO; 2075 return; 2076 } 2077 mutex_exit(&drp->r_statelock); 2078 } else 2079 reopen = 1; /* NULL dvp means this is a reopen */ 2080 2081 claim = resend_rqstp->lr_oclaim; 2082 ASSERT(claim == CLAIM_NULL || claim == CLAIM_DELEGATE_CUR); 2083 2084 args.ctag = TAG_OPEN_LOST; 2085 args.array_len = 4; 2086 args.array = argop; 2087 2088 argop[0].argop = OP_CPUTFH; 2089 if (reopen) { 2090 ASSERT(vp != NULL); 2091 2092 mi = VTOMI4(vp); 2093 /* 2094 * if this is a file mount then 2095 * use the mntinfo parentfh 2096 */ 2097 argop[0].nfs_argop4_u.opcputfh.sfh = 2098 (vp->v_flag & VROOT) ? mi->mi_srvparentfh : 2099 VTOSV(vp)->sv_dfh; 2100 args.ctag = TAG_REOPEN_LOST; 2101 } else { 2102 argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(dvp)->r_fh; 2103 mi = VTOMI4(dvp); 2104 } 2105 2106 argop[1].argop = OP_COPEN; 2107 open_args = &argop[1].nfs_argop4_u.opcopen; 2108 open_args->claim = claim; 2109 2110 /* 2111 * If we sent over a OPEN with CREATE then the only 2112 * thing we care about is to not leave dangling state 2113 * on the server, not whether the file we potentially 2114 * created remains on the server. So even though the 2115 * lost open request specified a CREATE, we only wish 2116 * to do a non-CREATE OPEN. 2117 */ 2118 open_args->opentype = OPEN4_NOCREATE; 2119 2120 srcfp = &resend_rqstp->lr_ofile; 2121 destclen = srcfp->utf8string_len; 2122 destcfp = kmem_alloc(destclen + 1, KM_SLEEP); 2123 bcopy(srcfp->utf8string_val, destcfp, destclen); 2124 destcfp[destclen] = '\0'; 2125 if (claim == CLAIM_DELEGATE_CUR) { 2126 open_args->open_claim4_u.delegate_cur_info.delegate_stateid = 2127 resend_rqstp->lr_ostateid; 2128 open_args->open_claim4_u.delegate_cur_info.cfile = destcfp; 2129 } else { 2130 open_args->open_claim4_u.cfile = destcfp; 2131 } 2132 2133 open_args->share_access = resend_rqstp->lr_oacc; 2134 open_args->share_deny = resend_rqstp->lr_odeny; 2135 oop = resend_rqstp->lr_oop; 2136 ASSERT(oop != NULL); 2137 2138 open_args->owner.clientid = mi2clientid(mi); 2139 /* this length never changes */ 2140 open_args->owner.owner_len = sizeof (oop->oo_name); 2141 open_args->owner.owner_val = 2142 kmem_alloc(open_args->owner.owner_len, KM_SLEEP); 2143 2144 ep->error = nfs4_start_open_seqid_sync(oop, mi); 2145 ASSERT(ep->error == 0); /* recov thread always succeeds */ 2146 /* 2147 * We can get away with not saving the seqid upon detection 2148 * of a lost request, and now just use the open owner's current 2149 * seqid since we only allow one op OTW per seqid and lost 2150 * requests are saved FIFO. 2151 */ 2152 seqid = nfs4_get_open_seqid(oop) + 1; 2153 open_args->seqid = seqid; 2154 2155 bcopy(&oop->oo_name, open_args->owner.owner_val, 2156 open_args->owner.owner_len); 2157 2158 /* getfh */ 2159 argop[2].argop = OP_GETFH; 2160 2161 /* Construct the getattr part of the compound */ 2162 argop[3].argop = OP_GETATTR; 2163 argop[3].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK; 2164 argop[3].nfs_argop4_u.opgetattr.mi = mi; 2165 2166 res.array = NULL; 2167 2168 t = gethrtime(); 2169 2170 rfs4call(mi, &args, &res, cr, &doqueue, 0, ep); 2171 2172 if (ep->error == 0 && nfs4_need_to_bump_seqid(&res)) 2173 nfs4_set_open_seqid(seqid, oop, args.ctag); 2174 2175 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2176 "nfs4_resend_open_otw: error %d stat %d", ep->error, res.status)); 2177 2178 if (ep->error || res.status) 2179 goto err_out; 2180 2181 op_res = &res.array[1].nfs_resop4_u.opopen; 2182 gf_res = &res.array[2].nfs_resop4_u.opgetfh; 2183 garp = &res.array[3].nfs_resop4_u.opgetattr.ga_res; 2184 2185 if (!vp) { 2186 int rnode_err = 0; 2187 nfs4_sharedfh_t *sfh; 2188 2189 /* 2190 * If we can't decode all the attributes they are not usable, 2191 * just make the vnode. 2192 */ 2193 2194 sfh = sfh4_get(&gf_res->object, VTOMI4(dvp)); 2195 *vpp = makenfs4node(sfh, garp, dvp->v_vfsp, t, cr, dvp, 2196 fn_get(VTOSV(dvp)->sv_name, 2197 open_args->open_claim4_u.cfile)); 2198 sfh4_rele(&sfh); 2199 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2200 "nfs4_resend_open_otw: made vp %p for file %s", 2201 (void *)(*vpp), open_args->open_claim4_u.cfile)); 2202 2203 if (ep->error) 2204 PURGE_ATTRCACHE4(*vpp); 2205 2206 /* 2207 * For the newly created *vpp case, make sure the rnode 2208 * isn't bad before using it. 2209 */ 2210 mutex_enter(&(VTOR4(*vpp))->r_statelock); 2211 if (VTOR4(*vpp)->r_flags & R4RECOVERR) 2212 rnode_err = EIO; 2213 mutex_exit(&(VTOR4(*vpp))->r_statelock); 2214 2215 if (rnode_err) { 2216 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2217 "nfs4_resend_open_otw: rp %p is bad", 2218 (void *)VTOR4(*vpp))); 2219 ep->error = rnode_err; 2220 goto err_out; 2221 } 2222 2223 vp = *vpp; 2224 rp = VTOR4(vp); 2225 } 2226 2227 if (reopen) { 2228 /* 2229 * Check if the path we reopened really is the same 2230 * file. We could end up in a situation were the file 2231 * was removed and a new file created with the same name. 2232 */ 2233 (void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_READER, 0); 2234 fh_different = 2235 (nfs4cmpfh(&rp->r_fh->sfh_fh, &gf_res->object) != 0); 2236 if (fh_different) { 2237 if (mi->mi_fh_expire_type == FH4_PERSISTENT || 2238 mi->mi_fh_expire_type & FH4_NOEXPIRE_WITH_OPEN) { 2239 /* Oops, we don't have the same file */ 2240 if (mi->mi_fh_expire_type == FH4_PERSISTENT) 2241 failed_msg = 2242 "Couldn't reopen: Persistant " 2243 "file handle changed"; 2244 else 2245 failed_msg = 2246 "Couldn't reopen: Volatile " 2247 "(no expire on open) file handle " 2248 "changed"; 2249 2250 nfs4_end_open_seqid_sync(oop); 2251 kmem_free(destcfp, destclen + 1); 2252 nfs4args_copen_free(open_args); 2253 (void) xdr_free(xdr_COMPOUND4res_clnt, 2254 (caddr_t)&res); 2255 nfs_rw_exit(&mi->mi_fh_lock); 2256 nfs4_fail_recov(vp, failed_msg, ep->error, 2257 ep->stat); 2258 return; 2259 } else { 2260 /* 2261 * We have volatile file handles that don't 2262 * compare. If the fids are the same then we 2263 * assume that the file handle expired but the 2264 * renode still refers to the same file object. 2265 * 2266 * First check that we have fids or not. 2267 * If we don't we have a dumb server so we will 2268 * just assume every thing is ok for now. 2269 */ 2270 if (!ep->error && 2271 garp->n4g_va.va_mask & AT_NODEID && 2272 rp->r_attr.va_mask & AT_NODEID && 2273 rp->r_attr.va_nodeid != 2274 garp->n4g_va.va_nodeid) { 2275 /* 2276 * We have fids, but they don't 2277 * compare. So kill the file. 2278 */ 2279 failed_msg = 2280 "Couldn't reopen: file handle " 2281 "changed due to mismatched fids"; 2282 nfs4_end_open_seqid_sync(oop); 2283 kmem_free(destcfp, destclen + 1); 2284 nfs4args_copen_free(open_args); 2285 (void) xdr_free(xdr_COMPOUND4res_clnt, 2286 (caddr_t)&res); 2287 nfs_rw_exit(&mi->mi_fh_lock); 2288 nfs4_fail_recov(vp, failed_msg, 2289 ep->error, ep->stat); 2290 return; 2291 } else { 2292 /* 2293 * We have volatile file handles that 2294 * refers to the same file (at least 2295 * they have the same fid) or we don't 2296 * have fids so we can't tell. :(. We'll 2297 * be a kind and accepting client so 2298 * we'll update the rnode's file 2299 * handle with the otw handle. 2300 * 2301 * We need to drop mi->mi_fh_lock since 2302 * sh4_update acquires it. Since there 2303 * is only one recovery thread there is 2304 * no race. 2305 */ 2306 nfs_rw_exit(&mi->mi_fh_lock); 2307 sfh4_update(rp->r_fh, &gf_res->object); 2308 } 2309 } 2310 } else { 2311 nfs_rw_exit(&mi->mi_fh_lock); 2312 } 2313 } 2314 2315 ASSERT(nfs4_consistent_type(vp)); 2316 2317 if (op_res->rflags & OPEN4_RESULT_CONFIRM) 2318 nfs4open_confirm(vp, &seqid, &op_res->stateid, cr, TRUE, 2319 &retry_open, oop, TRUE, ep, NULL); 2320 if (ep->error || ep->stat) { 2321 nfs4_end_open_seqid_sync(oop); 2322 kmem_free(destcfp, destclen + 1); 2323 nfs4args_copen_free(open_args); 2324 if (!ep->error) 2325 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2326 return; 2327 } 2328 2329 if (reopen) { 2330 /* 2331 * Doing a reopen here so the osp should already exist. 2332 * If not, something changed or went very wrong. 2333 * 2334 * returns with 'os_sync_lock' held 2335 */ 2336 osp = find_open_stream(oop, rp); 2337 if (!osp) { 2338 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2339 "nfs4_resend_open_otw: couldn't find osp")); 2340 ep->error = EINVAL; 2341 goto err_out; 2342 } 2343 osp->os_open_ref_count++; 2344 } else { 2345 mutex_enter(&oop->oo_lock); 2346 oop->oo_just_created = NFS4_PERM_CREATED; 2347 mutex_exit(&oop->oo_lock); 2348 2349 /* returns with 'os_sync_lock' held */ 2350 osp = find_or_create_open_stream(oop, rp, &created_osp); 2351 if (!osp) { 2352 NFS4_DEBUG(nfs4_lost_rqst_debug, (CE_NOTE, 2353 "nfs4_resend_open_otw: couldn't create osp")); 2354 ep->error = EINVAL; 2355 goto err_out; 2356 } 2357 } 2358 2359 osp->open_stateid = op_res->stateid; 2360 osp->os_delegation = FALSE; 2361 /* 2362 * Need to reset this bitfield for the possible case where we were 2363 * going to OTW CLOSE the file, got a non-recoverable error, and before 2364 * we could retry the CLOSE, OPENed the file again. 2365 */ 2366 ASSERT(osp->os_open_owner->oo_seqid_inuse); 2367 osp->os_final_close = 0; 2368 osp->os_force_close = 0; 2369 2370 if (!reopen) { 2371 if (open_args->share_access & OPEN4_SHARE_ACCESS_READ) 2372 osp->os_share_acc_read++; 2373 if (open_args->share_access & OPEN4_SHARE_ACCESS_WRITE) 2374 osp->os_share_acc_write++; 2375 osp->os_share_deny_none++; 2376 } 2377 2378 mutex_exit(&osp->os_sync_lock); 2379 if (created_osp) 2380 nfs4_inc_state_ref_count(mi); 2381 open_stream_rele(osp, rp); 2382 2383 nfs4_end_open_seqid_sync(oop); 2384 2385 /* accept delegation, if any */ 2386 nfs4_delegation_accept(rp, claim, op_res, garp, cr); 2387 2388 kmem_free(destcfp, destclen + 1); 2389 nfs4args_copen_free(open_args); 2390 2391 if (claim == CLAIM_DELEGATE_CUR) 2392 nfs4_attr_cache(vp, garp, t, cr, TRUE, NULL); 2393 else 2394 PURGE_ATTRCACHE4(vp); 2395 2396 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2397 2398 ASSERT(nfs4_consistent_type(vp)); 2399 2400 return; 2401 2402 err_out: 2403 nfs4_end_open_seqid_sync(oop); 2404 kmem_free(destcfp, destclen + 1); 2405 nfs4args_copen_free(open_args); 2406 if (!ep->error) 2407 (void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res); 2408 } 2409