1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/systm.h> 29 #include <sys/kmem.h> 30 #include <sys/cmn_err.h> 31 #include <sys/atomic.h> 32 #include <sys/clconf.h> 33 #include <sys/cladm.h> 34 #include <sys/flock.h> 35 #include <nfs/export.h> 36 #include <nfs/nfs.h> 37 #include <nfs/nfs4.h> 38 #include <nfs/nfssys.h> 39 #include <nfs/lm.h> 40 #include <sys/pathname.h> 41 #include <sys/sdt.h> 42 #include <sys/nvpair.h> 43 44 extern u_longlong_t nfs4_srv_caller_id; 45 46 extern time_t rfs4_start_time; 47 extern uint_t nfs4_srv_vkey; 48 49 stateid4 special0 = { 50 0, 51 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } 52 }; 53 54 stateid4 special1 = { 55 0xffffffff, 56 { 57 (char)0xff, (char)0xff, (char)0xff, (char)0xff, 58 (char)0xff, (char)0xff, (char)0xff, (char)0xff, 59 (char)0xff, (char)0xff, (char)0xff, (char)0xff 60 } 61 }; 62 63 64 #define ISSPECIAL(id) (stateid4_cmp(id, &special0) || \ 65 stateid4_cmp(id, &special1)) 66 67 /* For embedding the cluster nodeid into our clientid */ 68 #define CLUSTER_NODEID_SHIFT 24 69 #define CLUSTER_MAX_NODEID 255 70 71 #ifdef DEBUG 72 int rfs4_debug; 73 #endif 74 75 static uint32_t rfs4_database_debug = 0x00; 76 77 static void rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf); 78 static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf); 79 static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip); 80 static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip); 81 82 /* 83 * Couple of simple init/destroy functions for a general waiter 84 */ 85 void 86 rfs4_sw_init(rfs4_state_wait_t *swp) 87 { 88 mutex_init(swp->sw_cv_lock, NULL, MUTEX_DEFAULT, NULL); 89 cv_init(swp->sw_cv, NULL, CV_DEFAULT, NULL); 90 swp->sw_active = FALSE; 91 swp->sw_wait_count = 0; 92 } 93 94 void 95 rfs4_sw_destroy(rfs4_state_wait_t *swp) 96 { 97 mutex_destroy(swp->sw_cv_lock); 98 cv_destroy(swp->sw_cv); 99 } 100 101 void 102 rfs4_sw_enter(rfs4_state_wait_t *swp) 103 { 104 mutex_enter(swp->sw_cv_lock); 105 while (swp->sw_active) { 106 swp->sw_wait_count++; 107 cv_wait(swp->sw_cv, swp->sw_cv_lock); 108 swp->sw_wait_count--; 109 } 110 ASSERT(swp->sw_active == FALSE); 111 swp->sw_active = TRUE; 112 mutex_exit(swp->sw_cv_lock); 113 } 114 115 void 116 rfs4_sw_exit(rfs4_state_wait_t *swp) 117 { 118 mutex_enter(swp->sw_cv_lock); 119 ASSERT(swp->sw_active == TRUE); 120 swp->sw_active = FALSE; 121 if (swp->sw_wait_count != 0) 122 cv_broadcast(swp->sw_cv); 123 mutex_exit(swp->sw_cv_lock); 124 } 125 126 /* 127 * CPR callback id -- not related to v4 callbacks 128 */ 129 static callb_id_t cpr_id = 0; 130 131 static void 132 deep_lock_copy(LOCK4res *dres, LOCK4res *sres) 133 { 134 lock_owner4 *slo = &sres->LOCK4res_u.denied.owner; 135 lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner; 136 137 if (sres->status == NFS4ERR_DENIED) { 138 dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP); 139 bcopy(slo->owner_val, dlo->owner_val, slo->owner_len); 140 } 141 } 142 143 static void 144 deep_lock_free(LOCK4res *res) 145 { 146 lock_owner4 *lo = &res->LOCK4res_u.denied.owner; 147 148 if (res->status == NFS4ERR_DENIED) 149 kmem_free(lo->owner_val, lo->owner_len); 150 } 151 152 static void 153 deep_open_copy(OPEN4res *dres, OPEN4res *sres) 154 { 155 nfsace4 *sacep, *dacep; 156 157 if (sres->status != NFS4_OK) { 158 return; 159 } 160 161 dres->attrset = sres->attrset; 162 163 switch (sres->delegation.delegation_type) { 164 case OPEN_DELEGATE_NONE: 165 return; 166 case OPEN_DELEGATE_READ: 167 sacep = &sres->delegation.open_delegation4_u.read.permissions; 168 dacep = &dres->delegation.open_delegation4_u.read.permissions; 169 break; 170 case OPEN_DELEGATE_WRITE: 171 sacep = &sres->delegation.open_delegation4_u.write.permissions; 172 dacep = &dres->delegation.open_delegation4_u.write.permissions; 173 break; 174 } 175 dacep->who.utf8string_val = 176 kmem_alloc(sacep->who.utf8string_len, KM_SLEEP); 177 bcopy(sacep->who.utf8string_val, dacep->who.utf8string_val, 178 sacep->who.utf8string_len); 179 } 180 181 static void 182 deep_open_free(OPEN4res *res) 183 { 184 nfsace4 *acep; 185 if (res->status != NFS4_OK) 186 return; 187 188 switch (res->delegation.delegation_type) { 189 case OPEN_DELEGATE_NONE: 190 return; 191 case OPEN_DELEGATE_READ: 192 acep = &res->delegation.open_delegation4_u.read.permissions; 193 break; 194 case OPEN_DELEGATE_WRITE: 195 acep = &res->delegation.open_delegation4_u.write.permissions; 196 break; 197 } 198 199 if (acep->who.utf8string_val) { 200 kmem_free(acep->who.utf8string_val, acep->who.utf8string_len); 201 acep->who.utf8string_val = NULL; 202 } 203 } 204 205 void 206 rfs4_free_reply(nfs_resop4 *rp) 207 { 208 switch (rp->resop) { 209 case OP_LOCK: 210 deep_lock_free(&rp->nfs_resop4_u.oplock); 211 break; 212 case OP_OPEN: 213 deep_open_free(&rp->nfs_resop4_u.opopen); 214 default: 215 break; 216 } 217 } 218 219 void 220 rfs4_copy_reply(nfs_resop4 *dst, nfs_resop4 *src) 221 { 222 *dst = *src; 223 224 /* Handle responses that need deep copy */ 225 switch (src->resop) { 226 case OP_LOCK: 227 deep_lock_copy(&dst->nfs_resop4_u.oplock, 228 &src->nfs_resop4_u.oplock); 229 break; 230 case OP_OPEN: 231 deep_open_copy(&dst->nfs_resop4_u.opopen, 232 &src->nfs_resop4_u.opopen); 233 break; 234 default: 235 break; 236 }; 237 } 238 239 /* 240 * This is the implementation of the underlying state engine. The 241 * public interface to this engine is described by 242 * nfs4_state.h. Callers to the engine should hold no state engine 243 * locks when they call in to it. If the protocol needs to lock data 244 * structures it should do so after acquiring all references to them 245 * first and then follow the following lock order: 246 * 247 * client > openowner > state > lo_state > lockowner > file. 248 * 249 * Internally we only allow a thread to hold one hash bucket lock at a 250 * time and the lock is higher in the lock order (must be acquired 251 * first) than the data structure that is on that hash list. 252 * 253 * If a new reference was acquired by the caller, that reference needs 254 * to be released after releasing all acquired locks with the 255 * corresponding rfs4_*_rele routine. 256 */ 257 258 /* 259 * This code is some what prototypical for now. Its purpose currently is to 260 * implement the interfaces sufficiently to finish the higher protocol 261 * elements. This will be replaced by a dynamically resizeable tables 262 * backed by kmem_cache allocator. However synchronization is handled 263 * correctly (I hope) and will not change by much. The mutexes for 264 * the hash buckets that can be used to create new instances of data 265 * structures might be good candidates to evolve into reader writer 266 * locks. If it has to do a creation, it would be holding the 267 * mutex across a kmem_alloc with KM_SLEEP specified. 268 */ 269 270 #ifdef DEBUG 271 #define TABSIZE 17 272 #else 273 #define TABSIZE 2047 274 #endif 275 276 #define ADDRHASH(key) ((unsigned long)(key) >> 3) 277 278 /* Used to serialize create/destroy of rfs4_server_state database */ 279 kmutex_t rfs4_state_lock; 280 static rfs4_database_t *rfs4_server_state = NULL; 281 282 /* Used to serialize lookups of clientids */ 283 static krwlock_t rfs4_findclient_lock; 284 285 /* 286 * For now this "table" is exposed so that the CPR callback 287 * function can tromp through it.. 288 */ 289 rfs4_table_t *rfs4_client_tab; 290 291 static rfs4_index_t *rfs4_clientid_idx; 292 static rfs4_index_t *rfs4_nfsclnt_idx; 293 static rfs4_table_t *rfs4_openowner_tab; 294 static rfs4_index_t *rfs4_openowner_idx; 295 static rfs4_table_t *rfs4_state_tab; 296 static rfs4_index_t *rfs4_state_idx; 297 static rfs4_index_t *rfs4_state_owner_file_idx; 298 static rfs4_index_t *rfs4_state_file_idx; 299 static rfs4_table_t *rfs4_lo_state_tab; 300 static rfs4_index_t *rfs4_lo_state_idx; 301 static rfs4_index_t *rfs4_lo_state_owner_idx; 302 static rfs4_table_t *rfs4_lockowner_tab; 303 static rfs4_index_t *rfs4_lockowner_idx; 304 static rfs4_index_t *rfs4_lockowner_pid_idx; 305 static rfs4_table_t *rfs4_file_tab; 306 static rfs4_index_t *rfs4_file_idx; 307 static rfs4_table_t *rfs4_deleg_state_tab; 308 static rfs4_index_t *rfs4_deleg_idx; 309 static rfs4_index_t *rfs4_deleg_state_idx; 310 311 #define MAXTABSZ 1024*1024 312 313 /* The values below are rfs4_lease_time units */ 314 315 #ifdef DEBUG 316 #define CLIENT_CACHE_TIME 1 317 #define OPENOWNER_CACHE_TIME 1 318 #define STATE_CACHE_TIME 1 319 #define LO_STATE_CACHE_TIME 1 320 #define LOCKOWNER_CACHE_TIME 1 321 #define FILE_CACHE_TIME 3 322 #define DELEG_STATE_CACHE_TIME 1 323 #else 324 #define CLIENT_CACHE_TIME 10 325 #define OPENOWNER_CACHE_TIME 5 326 #define STATE_CACHE_TIME 1 327 #define LO_STATE_CACHE_TIME 1 328 #define LOCKOWNER_CACHE_TIME 3 329 #define FILE_CACHE_TIME 40 330 #define DELEG_STATE_CACHE_TIME 1 331 #endif 332 333 334 static time_t rfs4_client_cache_time = 0; 335 static time_t rfs4_openowner_cache_time = 0; 336 static time_t rfs4_state_cache_time = 0; 337 static time_t rfs4_lo_state_cache_time = 0; 338 static time_t rfs4_lockowner_cache_time = 0; 339 static time_t rfs4_file_cache_time = 0; 340 static time_t rfs4_deleg_state_cache_time = 0; 341 342 static bool_t rfs4_client_create(rfs4_entry_t, void *); 343 static void rfs4_dss_remove_cpleaf(rfs4_client_t *); 344 static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *); 345 static void rfs4_client_destroy(rfs4_entry_t); 346 static bool_t rfs4_client_expiry(rfs4_entry_t); 347 static uint32_t clientid_hash(void *); 348 static bool_t clientid_compare(rfs4_entry_t, void *); 349 static void *clientid_mkkey(rfs4_entry_t); 350 static uint32_t nfsclnt_hash(void *); 351 static bool_t nfsclnt_compare(rfs4_entry_t, void *); 352 static void *nfsclnt_mkkey(rfs4_entry_t); 353 static bool_t rfs4_openowner_create(rfs4_entry_t, void *); 354 static void rfs4_openowner_destroy(rfs4_entry_t); 355 static bool_t rfs4_openowner_expiry(rfs4_entry_t); 356 static uint32_t openowner_hash(void *); 357 static bool_t openowner_compare(rfs4_entry_t, void *); 358 static void *openowner_mkkey(rfs4_entry_t); 359 static bool_t rfs4_state_create(rfs4_entry_t, void *); 360 static void rfs4_state_destroy(rfs4_entry_t); 361 static bool_t rfs4_state_expiry(rfs4_entry_t); 362 static uint32_t state_hash(void *); 363 static bool_t state_compare(rfs4_entry_t, void *); 364 static void *state_mkkey(rfs4_entry_t); 365 static uint32_t state_owner_file_hash(void *); 366 static bool_t state_owner_file_compare(rfs4_entry_t, void *); 367 static void *state_owner_file_mkkey(rfs4_entry_t); 368 static uint32_t state_file_hash(void *); 369 static bool_t state_file_compare(rfs4_entry_t, void *); 370 static void *state_file_mkkey(rfs4_entry_t); 371 static bool_t rfs4_lo_state_create(rfs4_entry_t, void *); 372 static void rfs4_lo_state_destroy(rfs4_entry_t); 373 static bool_t rfs4_lo_state_expiry(rfs4_entry_t); 374 static uint32_t lo_state_hash(void *); 375 static bool_t lo_state_compare(rfs4_entry_t, void *); 376 static void *lo_state_mkkey(rfs4_entry_t); 377 static uint32_t lo_state_lo_hash(void *); 378 static bool_t lo_state_lo_compare(rfs4_entry_t, void *); 379 static void *lo_state_lo_mkkey(rfs4_entry_t); 380 static bool_t rfs4_lockowner_create(rfs4_entry_t, void *); 381 static void rfs4_lockowner_destroy(rfs4_entry_t); 382 static bool_t rfs4_lockowner_expiry(rfs4_entry_t); 383 static uint32_t lockowner_hash(void *); 384 static bool_t lockowner_compare(rfs4_entry_t, void *); 385 static void *lockowner_mkkey(rfs4_entry_t); 386 static uint32_t pid_hash(void *); 387 static bool_t pid_compare(rfs4_entry_t, void *); 388 static void *pid_mkkey(rfs4_entry_t); 389 static bool_t rfs4_file_create(rfs4_entry_t, void *); 390 static void rfs4_file_destroy(rfs4_entry_t); 391 static uint32_t file_hash(void *); 392 static bool_t file_compare(rfs4_entry_t, void *); 393 static void *file_mkkey(rfs4_entry_t); 394 static bool_t rfs4_deleg_state_create(rfs4_entry_t, void *); 395 static void rfs4_deleg_state_destroy(rfs4_entry_t); 396 static bool_t rfs4_deleg_state_expiry(rfs4_entry_t); 397 static uint32_t deleg_hash(void *); 398 static bool_t deleg_compare(rfs4_entry_t, void *); 399 static void *deleg_mkkey(rfs4_entry_t); 400 static uint32_t deleg_state_hash(void *); 401 static bool_t deleg_state_compare(rfs4_entry_t, void *); 402 static void *deleg_state_mkkey(rfs4_entry_t); 403 404 static void rfs4_state_rele_nounlock(rfs4_state_t *); 405 406 static int rfs4_ss_enabled = 0; 407 408 extern void (*rfs4_client_clrst)(struct nfs4clrst_args *); 409 410 void 411 rfs4_ss_pnfree(rfs4_ss_pn_t *ss_pn) 412 { 413 kmem_free(ss_pn, sizeof (rfs4_ss_pn_t)); 414 } 415 416 static rfs4_ss_pn_t * 417 rfs4_ss_pnalloc(char *dir, char *leaf) 418 { 419 rfs4_ss_pn_t *ss_pn; 420 int dir_len, leaf_len; 421 422 /* 423 * validate we have a resonable path 424 * (account for the '/' and trailing null) 425 */ 426 if ((dir_len = strlen(dir)) > MAXPATHLEN || 427 (leaf_len = strlen(leaf)) > MAXNAMELEN || 428 (dir_len + leaf_len + 2) > MAXPATHLEN) { 429 return (NULL); 430 } 431 432 ss_pn = kmem_alloc(sizeof (rfs4_ss_pn_t), KM_SLEEP); 433 434 (void) snprintf(ss_pn->pn, MAXPATHLEN, "%s/%s", dir, leaf); 435 /* Handy pointer to just the leaf name */ 436 ss_pn->leaf = ss_pn->pn + dir_len + 1; 437 return (ss_pn); 438 } 439 440 441 /* 442 * Move the "leaf" filename from "sdir" directory 443 * to the "ddir" directory. Return the pathname of 444 * the destination unless the rename fails in which 445 * case we need to return the source pathname. 446 */ 447 static rfs4_ss_pn_t * 448 rfs4_ss_movestate(char *sdir, char *ddir, char *leaf) 449 { 450 rfs4_ss_pn_t *src, *dst; 451 452 if ((src = rfs4_ss_pnalloc(sdir, leaf)) == NULL) 453 return (NULL); 454 455 if ((dst = rfs4_ss_pnalloc(ddir, leaf)) == NULL) { 456 rfs4_ss_pnfree(src); 457 return (NULL); 458 } 459 460 /* 461 * If the rename fails we shall return the src 462 * pathname and free the dst. Otherwise we need 463 * to free the src and return the dst pathanme. 464 */ 465 if (vn_rename(src->pn, dst->pn, UIO_SYSSPACE)) { 466 rfs4_ss_pnfree(dst); 467 return (src); 468 } 469 rfs4_ss_pnfree(src); 470 return (dst); 471 } 472 473 474 static rfs4_oldstate_t * 475 rfs4_ss_getstate(vnode_t *dvp, rfs4_ss_pn_t *ss_pn) 476 { 477 struct uio uio; 478 struct iovec iov[3]; 479 480 rfs4_oldstate_t *cl_ss = NULL; 481 vnode_t *vp; 482 vattr_t va; 483 uint_t id_len; 484 int err, kill_file, file_vers; 485 486 if (ss_pn == NULL) 487 return (NULL); 488 489 /* 490 * open the state file. 491 */ 492 if (vn_open(ss_pn->pn, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0) != 0) { 493 return (NULL); 494 } 495 496 if (vp->v_type != VREG) { 497 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); 498 VN_RELE(vp); 499 return (NULL); 500 } 501 502 err = VOP_ACCESS(vp, VREAD, 0, CRED(), NULL); 503 if (err) { 504 /* 505 * We don't have read access? better get the heck out. 506 */ 507 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); 508 VN_RELE(vp); 509 return (NULL); 510 } 511 512 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 513 /* 514 * get the file size to do some basic validation 515 */ 516 va.va_mask = AT_SIZE; 517 err = VOP_GETATTR(vp, &va, 0, CRED(), NULL); 518 519 kill_file = (va.va_size == 0 || va.va_size < 520 (NFS4_VERIFIER_SIZE + sizeof (uint_t)+1)); 521 522 if (err || kill_file) { 523 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 524 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); 525 VN_RELE(vp); 526 if (kill_file) { 527 (void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0); 528 } 529 return (NULL); 530 } 531 532 cl_ss = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP); 533 534 /* 535 * build iovecs to read in the file_version, verifier and id_len 536 */ 537 iov[0].iov_base = (caddr_t)&file_vers; 538 iov[0].iov_len = sizeof (int); 539 iov[1].iov_base = (caddr_t)&cl_ss->cl_id4.verifier; 540 iov[1].iov_len = NFS4_VERIFIER_SIZE; 541 iov[2].iov_base = (caddr_t)&id_len; 542 iov[2].iov_len = sizeof (uint_t); 543 544 uio.uio_iov = iov; 545 uio.uio_iovcnt = 3; 546 uio.uio_segflg = UIO_SYSSPACE; 547 uio.uio_loffset = 0; 548 uio.uio_resid = sizeof (int) + NFS4_VERIFIER_SIZE + sizeof (uint_t); 549 550 if (err = VOP_READ(vp, &uio, FREAD, CRED(), NULL)) { 551 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 552 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); 553 VN_RELE(vp); 554 kmem_free(cl_ss, sizeof (rfs4_oldstate_t)); 555 return (NULL); 556 } 557 558 /* 559 * if the file_version doesn't match or if the 560 * id_len is zero or the combination of the verifier, 561 * id_len and id_val is bigger than the file we have 562 * a problem. If so ditch the file. 563 */ 564 kill_file = (file_vers != NFS4_SS_VERSION || id_len == 0 || 565 (id_len + NFS4_VERIFIER_SIZE + sizeof (uint_t)) > va.va_size); 566 567 if (err || kill_file) { 568 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 569 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); 570 VN_RELE(vp); 571 kmem_free(cl_ss, sizeof (rfs4_oldstate_t)); 572 if (kill_file) { 573 (void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0); 574 } 575 return (NULL); 576 } 577 578 /* 579 * now get the client id value 580 */ 581 cl_ss->cl_id4.id_val = kmem_alloc(id_len, KM_SLEEP); 582 iov[0].iov_base = cl_ss->cl_id4.id_val; 583 iov[0].iov_len = id_len; 584 585 uio.uio_iov = iov; 586 uio.uio_iovcnt = 1; 587 uio.uio_segflg = UIO_SYSSPACE; 588 uio.uio_resid = cl_ss->cl_id4.id_len = id_len; 589 590 if (err = VOP_READ(vp, &uio, FREAD, CRED(), NULL)) { 591 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 592 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); 593 VN_RELE(vp); 594 kmem_free(cl_ss->cl_id4.id_val, id_len); 595 kmem_free(cl_ss, sizeof (rfs4_oldstate_t)); 596 return (NULL); 597 } 598 599 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 600 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); 601 VN_RELE(vp); 602 return (cl_ss); 603 } 604 605 #ifdef nextdp 606 #undef nextdp 607 #endif 608 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen)) 609 610 /* 611 * Add entries from statedir to supplied oldstate list. 612 * Optionally, move all entries from statedir -> destdir. 613 */ 614 void 615 rfs4_ss_oldstate(rfs4_oldstate_t *oldstate, char *statedir, char *destdir) 616 { 617 rfs4_ss_pn_t *ss_pn; 618 rfs4_oldstate_t *cl_ss = NULL; 619 char *dirt = NULL; 620 int err, dir_eof = 0, size = 0; 621 vnode_t *dvp; 622 struct iovec iov; 623 struct uio uio; 624 struct dirent64 *dep; 625 offset_t dirchunk_offset = 0; 626 627 /* 628 * open the state directory 629 */ 630 if (vn_open(statedir, UIO_SYSSPACE, FREAD, 0, &dvp, 0, 0)) 631 return; 632 633 if (dvp->v_type != VDIR || VOP_ACCESS(dvp, VREAD, 0, CRED(), NULL)) 634 goto out; 635 636 dirt = kmem_alloc(RFS4_SS_DIRSIZE, KM_SLEEP); 637 638 /* 639 * Get and process the directory entries 640 */ 641 while (!dir_eof) { 642 (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL); 643 iov.iov_base = dirt; 644 iov.iov_len = RFS4_SS_DIRSIZE; 645 uio.uio_iov = &iov; 646 uio.uio_iovcnt = 1; 647 uio.uio_segflg = UIO_SYSSPACE; 648 uio.uio_loffset = dirchunk_offset; 649 uio.uio_resid = RFS4_SS_DIRSIZE; 650 651 err = VOP_READDIR(dvp, &uio, CRED(), &dir_eof, NULL, 0); 652 VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL); 653 if (err) 654 goto out; 655 656 size = RFS4_SS_DIRSIZE - uio.uio_resid; 657 658 /* 659 * Process all the directory entries in this 660 * readdir chunk 661 */ 662 for (dep = (struct dirent64 *)dirt; size > 0; 663 dep = nextdp(dep)) { 664 665 size -= dep->d_reclen; 666 dirchunk_offset = dep->d_off; 667 668 /* 669 * Skip '.' and '..' 670 */ 671 if (NFS_IS_DOTNAME(dep->d_name)) 672 continue; 673 674 ss_pn = rfs4_ss_pnalloc(statedir, dep->d_name); 675 if (ss_pn == NULL) 676 continue; 677 678 if (cl_ss = rfs4_ss_getstate(dvp, ss_pn)) { 679 if (destdir != NULL) { 680 rfs4_ss_pnfree(ss_pn); 681 cl_ss->ss_pn = rfs4_ss_movestate( 682 statedir, destdir, dep->d_name); 683 } else { 684 cl_ss->ss_pn = ss_pn; 685 } 686 insque(cl_ss, oldstate); 687 } else { 688 rfs4_ss_pnfree(ss_pn); 689 } 690 } 691 } 692 693 out: 694 (void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL); 695 VN_RELE(dvp); 696 if (dirt) 697 kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE); 698 } 699 700 static void 701 rfs4_ss_init(void) 702 { 703 int npaths = 1; 704 char *default_dss_path = NFS4_DSS_VAR_DIR; 705 706 /* read the default stable storage state */ 707 rfs4_dss_readstate(npaths, &default_dss_path); 708 709 rfs4_ss_enabled = 1; 710 } 711 712 static void 713 rfs4_ss_fini(void) 714 { 715 rfs4_servinst_t *sip; 716 717 mutex_enter(&rfs4_servinst_lock); 718 sip = rfs4_cur_servinst; 719 while (sip != NULL) { 720 rfs4_dss_clear_oldstate(sip); 721 sip = sip->next; 722 } 723 mutex_exit(&rfs4_servinst_lock); 724 } 725 726 /* 727 * Remove all oldstate files referenced by this servinst. 728 */ 729 static void 730 rfs4_dss_clear_oldstate(rfs4_servinst_t *sip) 731 { 732 rfs4_oldstate_t *os_head, *osp; 733 734 rw_enter(&sip->oldstate_lock, RW_WRITER); 735 os_head = sip->oldstate; 736 737 if (os_head == NULL) 738 return; 739 740 /* skip dummy entry */ 741 osp = os_head->next; 742 while (osp != os_head) { 743 char *leaf = osp->ss_pn->leaf; 744 rfs4_oldstate_t *os_next; 745 746 rfs4_dss_remove_leaf(sip, NFS4_DSS_OLDSTATE_LEAF, leaf); 747 748 if (osp->cl_id4.id_val) 749 kmem_free(osp->cl_id4.id_val, osp->cl_id4.id_len); 750 if (osp->ss_pn) 751 kmem_free(osp->ss_pn, sizeof (rfs4_ss_pn_t)); 752 753 os_next = osp->next; 754 remque(osp); 755 kmem_free(osp, sizeof (rfs4_oldstate_t)); 756 osp = os_next; 757 } 758 759 /* free dummy entry */ 760 kmem_free(osp, sizeof (rfs4_oldstate_t)); 761 762 sip->oldstate = NULL; 763 764 rw_exit(&sip->oldstate_lock); 765 } 766 767 /* 768 * Form the state and oldstate paths, and read in the stable storage files. 769 */ 770 void 771 rfs4_dss_readstate(int npaths, char **paths) 772 { 773 int i; 774 char *state, *oldstate; 775 776 state = kmem_alloc(MAXPATHLEN, KM_SLEEP); 777 oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP); 778 779 for (i = 0; i < npaths; i++) { 780 char *path = paths[i]; 781 782 (void) sprintf(state, "%s/%s", path, NFS4_DSS_STATE_LEAF); 783 (void) sprintf(oldstate, "%s/%s", path, NFS4_DSS_OLDSTATE_LEAF); 784 785 /* 786 * Populate the current server instance's oldstate list. 787 * 788 * 1. Read stable storage data from old state directory, 789 * leaving its contents alone. 790 * 791 * 2. Read stable storage data from state directory, 792 * and move the latter's contents to old state 793 * directory. 794 */ 795 rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, oldstate, NULL); 796 rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, state, oldstate); 797 } 798 799 kmem_free(state, MAXPATHLEN); 800 kmem_free(oldstate, MAXPATHLEN); 801 } 802 803 804 /* 805 * Check if we are still in grace and if the client can be 806 * granted permission to perform reclaims. 807 */ 808 void 809 rfs4_ss_chkclid(rfs4_client_t *cp) 810 { 811 rfs4_servinst_t *sip; 812 813 /* 814 * It should be sufficient to check the oldstate data for just 815 * this client's instance. However, since our per-instance 816 * client grouping is solely temporal, HA-NFSv4 RG failover 817 * might result in clients of the same RG being partitioned into 818 * separate instances. 819 * 820 * Until the client grouping is improved, we must check the 821 * oldstate data for all instances with an active grace period. 822 * 823 * This also serves as the mechanism to remove stale oldstate data. 824 * The first time we check an instance after its grace period has 825 * expired, the oldstate data should be cleared. 826 * 827 * Start at the current instance, and walk the list backwards 828 * to the first. 829 */ 830 mutex_enter(&rfs4_servinst_lock); 831 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) { 832 rfs4_ss_chkclid_sip(cp, sip); 833 834 /* if the above check found this client, we're done */ 835 if (cp->can_reclaim) 836 break; 837 } 838 mutex_exit(&rfs4_servinst_lock); 839 } 840 841 static void 842 rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip) 843 { 844 rfs4_oldstate_t *osp, *os_head; 845 846 /* short circuit everything if this server instance has no oldstate */ 847 rw_enter(&sip->oldstate_lock, RW_READER); 848 os_head = sip->oldstate; 849 rw_exit(&sip->oldstate_lock); 850 if (os_head == NULL) 851 return; 852 853 /* 854 * If this server instance is no longer in a grace period then 855 * the client won't be able to reclaim. No further need for this 856 * instance's oldstate data, so it can be cleared. 857 */ 858 if (!rfs4_servinst_in_grace(sip)) 859 return; 860 861 /* this instance is still in grace; search for the clientid */ 862 863 rw_enter(&sip->oldstate_lock, RW_READER); 864 865 os_head = sip->oldstate; 866 /* skip dummy entry */ 867 osp = os_head->next; 868 while (osp != os_head) { 869 if (osp->cl_id4.id_len == cp->nfs_client.id_len) { 870 if (bcmp(osp->cl_id4.id_val, cp->nfs_client.id_val, 871 osp->cl_id4.id_len) == 0) { 872 cp->can_reclaim = 1; 873 break; 874 } 875 } 876 osp = osp->next; 877 } 878 879 rw_exit(&sip->oldstate_lock); 880 } 881 882 /* 883 * Place client information into stable storage: 1/3. 884 * First, generate the leaf filename, from the client's IP address and 885 * the server-generated short-hand clientid. 886 */ 887 void 888 rfs4_ss_clid(rfs4_client_t *cp, struct svc_req *req) 889 { 890 const char *kinet_ntop6(uchar_t *, char *, size_t); 891 char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN]; 892 struct sockaddr *ca; 893 uchar_t *b; 894 895 if (rfs4_ss_enabled == 0) { 896 return; 897 } 898 899 buf[0] = 0; 900 901 902 ca = (struct sockaddr *)svc_getrpccaller(req->rq_xprt)->buf; 903 if (ca == NULL) { 904 return; 905 } 906 907 /* 908 * Convert the caller's IP address to a dotted string 909 */ 910 if (ca->sa_family == AF_INET) { 911 912 bcopy(svc_getrpccaller(req->rq_xprt)->buf, &cp->cl_addr, 913 sizeof (struct sockaddr_in)); 914 b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr; 915 (void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF, 916 b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF); 917 } else if (ca->sa_family == AF_INET6) { 918 struct sockaddr_in6 *sin6; 919 920 sin6 = (struct sockaddr_in6 *)ca; 921 bcopy(svc_getrpccaller(req->rq_xprt)->buf, &cp->cl_addr, 922 sizeof (struct sockaddr_in6)); 923 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr, 924 buf, INET6_ADDRSTRLEN); 925 } 926 927 (void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf, 928 (longlong_t)cp->clientid); 929 rfs4_ss_clid_write(cp, leaf); 930 } 931 932 /* 933 * Place client information into stable storage: 2/3. 934 * DSS: distributed stable storage: the file may need to be written to 935 * multiple directories. 936 */ 937 static void 938 rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf) 939 { 940 rfs4_servinst_t *sip; 941 942 /* 943 * It should be sufficient to write the leaf file to (all) DSS paths 944 * associated with just this client's instance. However, since our 945 * per-instance client grouping is solely temporal, HA-NFSv4 RG 946 * failover might result in us losing DSS data. 947 * 948 * Until the client grouping is improved, we must write the DSS data 949 * to all instances' paths. Start at the current instance, and 950 * walk the list backwards to the first. 951 */ 952 mutex_enter(&rfs4_servinst_lock); 953 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) { 954 int i, npaths = sip->dss_npaths; 955 956 /* write the leaf file to all DSS paths */ 957 for (i = 0; i < npaths; i++) { 958 rfs4_dss_path_t *dss_path = sip->dss_paths[i]; 959 960 /* HA-NFSv4 path might have been failed-away from us */ 961 if (dss_path == NULL) 962 continue; 963 964 rfs4_ss_clid_write_one(cp, dss_path->path, leaf); 965 } 966 } 967 mutex_exit(&rfs4_servinst_lock); 968 } 969 970 /* 971 * Place client information into stable storage: 3/3. 972 * Write the stable storage data to the requested file. 973 */ 974 static void 975 rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf) 976 { 977 int ioflag; 978 int file_vers = NFS4_SS_VERSION; 979 size_t dirlen; 980 struct uio uio; 981 struct iovec iov[4]; 982 char *dir; 983 rfs4_ss_pn_t *ss_pn; 984 vnode_t *vp; 985 nfs_client_id4 *cl_id4 = &(cp->nfs_client); 986 987 /* allow 2 extra bytes for '/' & NUL */ 988 dirlen = strlen(dss_path) + strlen(NFS4_DSS_STATE_LEAF) + 2; 989 dir = kmem_alloc(dirlen, KM_SLEEP); 990 (void) sprintf(dir, "%s/%s", dss_path, NFS4_DSS_STATE_LEAF); 991 992 ss_pn = rfs4_ss_pnalloc(dir, leaf); 993 /* rfs4_ss_pnalloc takes its own copy */ 994 kmem_free(dir, dirlen); 995 if (ss_pn == NULL) 996 return; 997 998 if (vn_open(ss_pn->pn, UIO_SYSSPACE, FCREAT|FWRITE, 0600, &vp, 999 CRCREAT, 0)) { 1000 rfs4_ss_pnfree(ss_pn); 1001 return; 1002 } 1003 1004 /* 1005 * We need to record leaf - i.e. the filename - so that we know 1006 * what to remove, in the future. However, the dir part of cp->ss_pn 1007 * should never be referenced directly, since it's potentially only 1008 * one of several paths with this leaf in it. 1009 */ 1010 if (cp->ss_pn != NULL) { 1011 if (strcmp(cp->ss_pn->leaf, leaf) == 0) { 1012 /* we've already recorded *this* leaf */ 1013 rfs4_ss_pnfree(ss_pn); 1014 } else { 1015 /* replace with this leaf */ 1016 rfs4_ss_pnfree(cp->ss_pn); 1017 cp->ss_pn = ss_pn; 1018 } 1019 } else { 1020 cp->ss_pn = ss_pn; 1021 } 1022 1023 /* 1024 * Build a scatter list that points to the nfs_client_id4 1025 */ 1026 iov[0].iov_base = (caddr_t)&file_vers; 1027 iov[0].iov_len = sizeof (int); 1028 iov[1].iov_base = (caddr_t)&(cl_id4->verifier); 1029 iov[1].iov_len = NFS4_VERIFIER_SIZE; 1030 iov[2].iov_base = (caddr_t)&(cl_id4->id_len); 1031 iov[2].iov_len = sizeof (uint_t); 1032 iov[3].iov_base = (caddr_t)cl_id4->id_val; 1033 iov[3].iov_len = cl_id4->id_len; 1034 1035 uio.uio_iov = iov; 1036 uio.uio_iovcnt = 4; 1037 uio.uio_loffset = 0; 1038 uio.uio_segflg = UIO_SYSSPACE; 1039 uio.uio_llimit = (rlim64_t)MAXOFFSET_T; 1040 uio.uio_resid = cl_id4->id_len + sizeof (int) + 1041 NFS4_VERIFIER_SIZE + sizeof (uint_t); 1042 1043 ioflag = uio.uio_fmode = (FWRITE|FSYNC); 1044 uio.uio_extflg = UIO_COPY_DEFAULT; 1045 1046 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); 1047 /* write the full client id to the file. */ 1048 (void) VOP_WRITE(vp, &uio, ioflag, CRED(), NULL); 1049 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 1050 1051 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL); 1052 VN_RELE(vp); 1053 } 1054 1055 /* 1056 * DSS: distributed stable storage. 1057 * Unpack the list of paths passed by nfsd. 1058 * Use nvlist_alloc(9F) to manage the data. 1059 * The caller is responsible for allocating and freeing the buffer. 1060 */ 1061 int 1062 rfs4_dss_setpaths(char *buf, size_t buflen) 1063 { 1064 int error; 1065 1066 /* 1067 * If this is a "warm start", i.e. we previously had DSS paths, 1068 * preserve the old paths. 1069 */ 1070 if (rfs4_dss_paths != NULL) { 1071 /* 1072 * Before we lose the ptr, destroy the nvlist and pathnames 1073 * array from the warm start before this one. 1074 */ 1075 if (rfs4_dss_oldpaths) 1076 nvlist_free(rfs4_dss_oldpaths); 1077 rfs4_dss_oldpaths = rfs4_dss_paths; 1078 } 1079 1080 /* unpack the buffer into a searchable nvlist */ 1081 error = nvlist_unpack(buf, buflen, &rfs4_dss_paths, KM_SLEEP); 1082 if (error) 1083 return (error); 1084 1085 /* 1086 * Search the nvlist for the pathnames nvpair (which is the only nvpair 1087 * in the list, and record its location. 1088 */ 1089 error = nvlist_lookup_string_array(rfs4_dss_paths, NFS4_DSS_NVPAIR_NAME, 1090 &rfs4_dss_newpaths, &rfs4_dss_numnewpaths); 1091 return (error); 1092 } 1093 1094 /* 1095 * Ultimately the nfssys() call NFS4_CLR_STATE endsup here 1096 * to find and mark the client for forced expire. 1097 */ 1098 static void 1099 rfs4_client_scrub(rfs4_entry_t ent, void *arg) 1100 { 1101 rfs4_client_t *cp = (rfs4_client_t *)ent; 1102 struct nfs4clrst_args *clr = arg; 1103 struct sockaddr_in6 *ent_sin6; 1104 struct in6_addr clr_in6; 1105 struct sockaddr_in *ent_sin; 1106 struct in_addr clr_in; 1107 1108 if (clr->addr_type != cp->cl_addr.ss_family) { 1109 return; 1110 } 1111 1112 switch (clr->addr_type) { 1113 1114 case AF_INET6: 1115 /* copyin the address from user space */ 1116 if (copyin(clr->ap, &clr_in6, sizeof (clr_in6))) { 1117 break; 1118 } 1119 1120 ent_sin6 = (struct sockaddr_in6 *)&cp->cl_addr; 1121 1122 /* 1123 * now compare, and if equivalent mark entry 1124 * for forced expiration 1125 */ 1126 if (IN6_ARE_ADDR_EQUAL(&ent_sin6->sin6_addr, &clr_in6)) { 1127 cp->forced_expire = 1; 1128 } 1129 break; 1130 1131 case AF_INET: 1132 /* copyin the address from user space */ 1133 if (copyin(clr->ap, &clr_in, sizeof (clr_in))) { 1134 break; 1135 } 1136 1137 ent_sin = (struct sockaddr_in *)&cp->cl_addr; 1138 1139 /* 1140 * now compare, and if equivalent mark entry 1141 * for forced expiration 1142 */ 1143 if (ent_sin->sin_addr.s_addr == clr_in.s_addr) { 1144 cp->forced_expire = 1; 1145 } 1146 break; 1147 1148 default: 1149 /* force this assert to fail */ 1150 ASSERT(clr->addr_type != clr->addr_type); 1151 } 1152 } 1153 1154 /* 1155 * This is called from nfssys() in order to clear server state 1156 * for the specified client IP Address. 1157 */ 1158 void 1159 rfs4_clear_client_state(struct nfs4clrst_args *clr) 1160 { 1161 (void) rfs4_dbe_walk(rfs4_client_tab, rfs4_client_scrub, clr); 1162 } 1163 1164 /* 1165 * Used to initialize the NFSv4 server's state or database. All of 1166 * the tables are created and timers are set. Only called when NFSv4 1167 * service is provided. 1168 */ 1169 void 1170 rfs4_state_init() 1171 { 1172 int start_grace; 1173 extern boolean_t rfs4_cpr_callb(void *, int); 1174 char *dss_path = NFS4_DSS_VAR_DIR; 1175 1176 mutex_enter(&rfs4_state_lock); 1177 1178 /* 1179 * If the server state database has already been initialized, 1180 * skip it 1181 */ 1182 if (rfs4_server_state != NULL) { 1183 mutex_exit(&rfs4_state_lock); 1184 return; 1185 } 1186 1187 rw_init(&rfs4_findclient_lock, NULL, RW_DEFAULT, NULL); 1188 1189 /* 1190 * Set the boot time. If the server 1191 * has been restarted quickly and has had the opportunity to 1192 * service clients, then the start_time needs to be bumped 1193 * regardless. A small window but it exists... 1194 */ 1195 if (rfs4_start_time != gethrestime_sec()) 1196 rfs4_start_time = gethrestime_sec(); 1197 else 1198 rfs4_start_time++; 1199 1200 /* DSS: distributed stable storage: initialise served paths list */ 1201 rfs4_dss_pathlist = NULL; 1202 1203 /* 1204 * Create the first server instance, or a new one if the server has 1205 * been restarted; see above comments on rfs4_start_time. Don't 1206 * start its grace period; that will be done later, to maximise the 1207 * clients' recovery window. 1208 */ 1209 start_grace = 0; 1210 rfs4_servinst_create(start_grace, 1, &dss_path); 1211 1212 /* reset the "first NFSv4 request" status */ 1213 rfs4_seen_first_compound = 0; 1214 1215 /* 1216 * Add a CPR callback so that we can update client 1217 * access times to extend the lease after a suspend 1218 * and resume (using the same class as rpcmod/connmgr) 1219 */ 1220 cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4"); 1221 1222 /* set the various cache timers for table creation */ 1223 if (rfs4_client_cache_time == 0) 1224 rfs4_client_cache_time = CLIENT_CACHE_TIME; 1225 if (rfs4_openowner_cache_time == 0) 1226 rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME; 1227 if (rfs4_state_cache_time == 0) 1228 rfs4_state_cache_time = STATE_CACHE_TIME; 1229 if (rfs4_lo_state_cache_time == 0) 1230 rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME; 1231 if (rfs4_lockowner_cache_time == 0) 1232 rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME; 1233 if (rfs4_file_cache_time == 0) 1234 rfs4_file_cache_time = FILE_CACHE_TIME; 1235 if (rfs4_deleg_state_cache_time == 0) 1236 rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME; 1237 1238 /* Create the overall database to hold all server state */ 1239 rfs4_server_state = rfs4_database_create(rfs4_database_debug); 1240 1241 /* Now create the individual tables */ 1242 rfs4_client_cache_time *= rfs4_lease_time; 1243 rfs4_client_tab = rfs4_table_create(rfs4_server_state, 1244 "Client", 1245 rfs4_client_cache_time, 1246 2, 1247 rfs4_client_create, 1248 rfs4_client_destroy, 1249 rfs4_client_expiry, 1250 sizeof (rfs4_client_t), 1251 TABSIZE, 1252 MAXTABSZ/8, 100); 1253 rfs4_nfsclnt_idx = rfs4_index_create(rfs4_client_tab, 1254 "nfs_client_id4", nfsclnt_hash, 1255 nfsclnt_compare, nfsclnt_mkkey, 1256 TRUE); 1257 rfs4_clientid_idx = rfs4_index_create(rfs4_client_tab, 1258 "client_id", clientid_hash, 1259 clientid_compare, clientid_mkkey, 1260 FALSE); 1261 1262 rfs4_openowner_cache_time *= rfs4_lease_time; 1263 rfs4_openowner_tab = rfs4_table_create(rfs4_server_state, 1264 "OpenOwner", 1265 rfs4_openowner_cache_time, 1266 1, 1267 rfs4_openowner_create, 1268 rfs4_openowner_destroy, 1269 rfs4_openowner_expiry, 1270 sizeof (rfs4_openowner_t), 1271 TABSIZE, 1272 MAXTABSZ, 100); 1273 rfs4_openowner_idx = rfs4_index_create(rfs4_openowner_tab, 1274 "open_owner4", openowner_hash, 1275 openowner_compare, 1276 openowner_mkkey, TRUE); 1277 1278 rfs4_state_cache_time *= rfs4_lease_time; 1279 rfs4_state_tab = rfs4_table_create(rfs4_server_state, 1280 "OpenStateID", 1281 rfs4_state_cache_time, 1282 3, 1283 rfs4_state_create, 1284 rfs4_state_destroy, 1285 rfs4_state_expiry, 1286 sizeof (rfs4_state_t), 1287 TABSIZE, 1288 MAXTABSZ, 100); 1289 1290 rfs4_state_owner_file_idx = rfs4_index_create(rfs4_state_tab, 1291 "Openowner-File", 1292 state_owner_file_hash, 1293 state_owner_file_compare, 1294 state_owner_file_mkkey, TRUE); 1295 1296 rfs4_state_idx = rfs4_index_create(rfs4_state_tab, 1297 "State-id", state_hash, 1298 state_compare, state_mkkey, FALSE); 1299 1300 rfs4_state_file_idx = rfs4_index_create(rfs4_state_tab, 1301 "File", state_file_hash, 1302 state_file_compare, state_file_mkkey, 1303 FALSE); 1304 1305 rfs4_lo_state_cache_time *= rfs4_lease_time; 1306 rfs4_lo_state_tab = rfs4_table_create(rfs4_server_state, 1307 "LockStateID", 1308 rfs4_lo_state_cache_time, 1309 2, 1310 rfs4_lo_state_create, 1311 rfs4_lo_state_destroy, 1312 rfs4_lo_state_expiry, 1313 sizeof (rfs4_lo_state_t), 1314 TABSIZE, 1315 MAXTABSZ, 100); 1316 1317 rfs4_lo_state_owner_idx = rfs4_index_create(rfs4_lo_state_tab, 1318 "lockownerxstate", 1319 lo_state_lo_hash, 1320 lo_state_lo_compare, 1321 lo_state_lo_mkkey, TRUE); 1322 1323 rfs4_lo_state_idx = rfs4_index_create(rfs4_lo_state_tab, 1324 "State-id", 1325 lo_state_hash, lo_state_compare, 1326 lo_state_mkkey, FALSE); 1327 1328 rfs4_lockowner_cache_time *= rfs4_lease_time; 1329 1330 rfs4_lockowner_tab = rfs4_table_create(rfs4_server_state, 1331 "Lockowner", 1332 rfs4_lockowner_cache_time, 1333 2, 1334 rfs4_lockowner_create, 1335 rfs4_lockowner_destroy, 1336 rfs4_lockowner_expiry, 1337 sizeof (rfs4_lockowner_t), 1338 TABSIZE, 1339 MAXTABSZ, 100); 1340 1341 rfs4_lockowner_idx = rfs4_index_create(rfs4_lockowner_tab, 1342 "lock_owner4", lockowner_hash, 1343 lockowner_compare, 1344 lockowner_mkkey, TRUE); 1345 1346 rfs4_lockowner_pid_idx = rfs4_index_create(rfs4_lockowner_tab, 1347 "pid", pid_hash, 1348 pid_compare, pid_mkkey, 1349 FALSE); 1350 1351 rfs4_file_cache_time *= rfs4_lease_time; 1352 rfs4_file_tab = rfs4_table_create(rfs4_server_state, 1353 "File", 1354 rfs4_file_cache_time, 1355 1, 1356 rfs4_file_create, 1357 rfs4_file_destroy, 1358 NULL, 1359 sizeof (rfs4_file_t), 1360 TABSIZE, 1361 MAXTABSZ, -1); 1362 1363 rfs4_file_idx = rfs4_index_create(rfs4_file_tab, 1364 "Filehandle", file_hash, 1365 file_compare, file_mkkey, TRUE); 1366 1367 rfs4_deleg_state_cache_time *= rfs4_lease_time; 1368 rfs4_deleg_state_tab = rfs4_table_create(rfs4_server_state, 1369 "DelegStateID", 1370 rfs4_deleg_state_cache_time, 1371 2, 1372 rfs4_deleg_state_create, 1373 rfs4_deleg_state_destroy, 1374 rfs4_deleg_state_expiry, 1375 sizeof (rfs4_deleg_state_t), 1376 TABSIZE, 1377 MAXTABSZ, 100); 1378 rfs4_deleg_idx = rfs4_index_create(rfs4_deleg_state_tab, 1379 "DelegByFileClient", 1380 deleg_hash, 1381 deleg_compare, 1382 deleg_mkkey, TRUE); 1383 1384 rfs4_deleg_state_idx = rfs4_index_create(rfs4_deleg_state_tab, 1385 "DelegState", 1386 deleg_state_hash, 1387 deleg_state_compare, 1388 deleg_state_mkkey, FALSE); 1389 1390 /* 1391 * Init the stable storage. 1392 */ 1393 rfs4_ss_init(); 1394 1395 rfs4_client_clrst = rfs4_clear_client_state; 1396 1397 mutex_exit(&rfs4_state_lock); 1398 } 1399 1400 1401 /* 1402 * Used at server shutdown to cleanup all of the NFSv4 server's structures 1403 * and other state. 1404 */ 1405 void 1406 rfs4_state_fini() 1407 { 1408 rfs4_database_t *dbp; 1409 1410 mutex_enter(&rfs4_state_lock); 1411 1412 if (rfs4_server_state == NULL) { 1413 mutex_exit(&rfs4_state_lock); 1414 return; 1415 } 1416 1417 rfs4_client_clrst = NULL; 1418 1419 rfs4_set_deleg_policy(SRV_NEVER_DELEGATE); 1420 dbp = rfs4_server_state; 1421 rfs4_server_state = NULL; 1422 1423 /* 1424 * Cleanup the CPR callback. 1425 */ 1426 if (cpr_id) 1427 (void) callb_delete(cpr_id); 1428 1429 rw_destroy(&rfs4_findclient_lock); 1430 1431 /* First stop all of the reaper threads in the database */ 1432 rfs4_database_shutdown(dbp); 1433 /* clean up any dangling stable storage structures */ 1434 rfs4_ss_fini(); 1435 /* Now actually destroy/release the database and its tables */ 1436 rfs4_database_destroy(dbp); 1437 1438 /* Reset the cache timers for next time */ 1439 rfs4_client_cache_time = 0; 1440 rfs4_openowner_cache_time = 0; 1441 rfs4_state_cache_time = 0; 1442 rfs4_lo_state_cache_time = 0; 1443 rfs4_lockowner_cache_time = 0; 1444 rfs4_file_cache_time = 0; 1445 rfs4_deleg_state_cache_time = 0; 1446 1447 mutex_exit(&rfs4_state_lock); 1448 1449 /* destroy server instances and current instance ptr */ 1450 rfs4_servinst_destroy_all(); 1451 1452 /* reset the "first NFSv4 request" status */ 1453 rfs4_seen_first_compound = 0; 1454 1455 /* DSS: distributed stable storage */ 1456 if (rfs4_dss_oldpaths) 1457 nvlist_free(rfs4_dss_oldpaths); 1458 if (rfs4_dss_paths) 1459 nvlist_free(rfs4_dss_paths); 1460 rfs4_dss_paths = rfs4_dss_oldpaths = NULL; 1461 } 1462 1463 typedef union { 1464 struct { 1465 uint32_t start_time; 1466 uint32_t c_id; 1467 } impl_id; 1468 clientid4 id4; 1469 } cid; 1470 1471 static int foreign_stateid(stateid_t *id); 1472 static int foreign_clientid(cid *cidp); 1473 static void embed_nodeid(cid *cidp); 1474 1475 typedef union { 1476 struct { 1477 uint32_t c_id; 1478 uint32_t gen_num; 1479 } cv_impl; 1480 verifier4 confirm_verf; 1481 } scid_confirm_verf; 1482 1483 static uint32_t 1484 clientid_hash(void *key) 1485 { 1486 cid *idp = key; 1487 1488 return (idp->impl_id.c_id); 1489 } 1490 1491 static bool_t 1492 clientid_compare(rfs4_entry_t entry, void *key) 1493 { 1494 rfs4_client_t *client = (rfs4_client_t *)entry; 1495 clientid4 *idp = key; 1496 1497 return (*idp == client->clientid); 1498 } 1499 1500 static void * 1501 clientid_mkkey(rfs4_entry_t entry) 1502 { 1503 rfs4_client_t *client = (rfs4_client_t *)entry; 1504 1505 return (&client->clientid); 1506 } 1507 1508 static uint32_t 1509 nfsclnt_hash(void *key) 1510 { 1511 nfs_client_id4 *client = key; 1512 int i; 1513 uint32_t hash = 0; 1514 1515 for (i = 0; i < client->id_len; i++) { 1516 hash <<= 1; 1517 hash += (uint_t)client->id_val[i]; 1518 } 1519 return (hash); 1520 } 1521 1522 1523 static bool_t 1524 nfsclnt_compare(rfs4_entry_t entry, void *key) 1525 { 1526 rfs4_client_t *client = (rfs4_client_t *)entry; 1527 nfs_client_id4 *nfs_client = key; 1528 1529 if (client->nfs_client.id_len != nfs_client->id_len) 1530 return (FALSE); 1531 1532 return (bcmp(client->nfs_client.id_val, nfs_client->id_val, 1533 nfs_client->id_len) == 0); 1534 } 1535 1536 static void * 1537 nfsclnt_mkkey(rfs4_entry_t entry) 1538 { 1539 rfs4_client_t *client = (rfs4_client_t *)entry; 1540 1541 return (&client->nfs_client); 1542 } 1543 1544 static bool_t 1545 rfs4_client_expiry(rfs4_entry_t u_entry) 1546 { 1547 rfs4_client_t *cp = (rfs4_client_t *)u_entry; 1548 bool_t cp_expired; 1549 1550 if (rfs4_dbe_is_invalid(cp->dbe)) 1551 return (TRUE); 1552 /* 1553 * If the sysadmin has used clear_locks for this 1554 * entry then forced_expire will be set and we 1555 * want this entry to be reaped. Or the entry 1556 * has exceeded its lease period. 1557 */ 1558 cp_expired = (cp->forced_expire || 1559 (gethrestime_sec() - cp->last_access 1560 > rfs4_lease_time)); 1561 1562 if (!cp->ss_remove && cp_expired) 1563 cp->ss_remove = 1; 1564 return (cp_expired); 1565 } 1566 1567 /* 1568 * Remove the leaf file from all distributed stable storage paths. 1569 */ 1570 static void 1571 rfs4_dss_remove_cpleaf(rfs4_client_t *cp) 1572 { 1573 char *leaf = cp->ss_pn->leaf; 1574 1575 rfs4_dss_remove_leaf(cp->server_instance, NFS4_DSS_STATE_LEAF, leaf); 1576 } 1577 1578 static void 1579 rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf) 1580 { 1581 int i, npaths = sip->dss_npaths; 1582 1583 for (i = 0; i < npaths; i++) { 1584 rfs4_dss_path_t *dss_path = sip->dss_paths[i]; 1585 char *path, *dir; 1586 size_t pathlen; 1587 1588 /* the HA-NFSv4 path might have been failed-over away from us */ 1589 if (dss_path == NULL) 1590 continue; 1591 1592 dir = dss_path->path; 1593 1594 /* allow 3 extra bytes for two '/' & a NUL */ 1595 pathlen = strlen(dir) + strlen(dir_leaf) + strlen(leaf) + 3; 1596 path = kmem_alloc(pathlen, KM_SLEEP); 1597 (void) sprintf(path, "%s/%s/%s", dir, dir_leaf, leaf); 1598 1599 (void) vn_remove(path, UIO_SYSSPACE, RMFILE); 1600 1601 kmem_free(path, pathlen); 1602 } 1603 } 1604 1605 static void 1606 rfs4_client_destroy(rfs4_entry_t u_entry) 1607 { 1608 rfs4_client_t *cp = (rfs4_client_t *)u_entry; 1609 1610 mutex_destroy(cp->cbinfo.cb_lock); 1611 cv_destroy(cp->cbinfo.cb_cv); 1612 cv_destroy(cp->cbinfo.cb_cv_nullcaller); 1613 1614 /* free callback info */ 1615 rfs4_cbinfo_free(&cp->cbinfo); 1616 1617 if (cp->cp_confirmed) 1618 rfs4_client_rele(cp->cp_confirmed); 1619 1620 if (cp->ss_pn) { 1621 /* check if the stable storage files need to be removed */ 1622 if (cp->ss_remove) 1623 rfs4_dss_remove_cpleaf(cp); 1624 rfs4_ss_pnfree(cp->ss_pn); 1625 } 1626 1627 /* Free the client supplied client id */ 1628 kmem_free(cp->nfs_client.id_val, cp->nfs_client.id_len); 1629 1630 if (cp->sysidt != LM_NOSYSID) 1631 lm_free_sysidt(cp->sysidt); 1632 } 1633 1634 static bool_t 1635 rfs4_client_create(rfs4_entry_t u_entry, void *arg) 1636 { 1637 rfs4_client_t *cp = (rfs4_client_t *)u_entry; 1638 nfs_client_id4 *client = (nfs_client_id4 *)arg; 1639 cid *cidp; 1640 scid_confirm_verf *scvp; 1641 1642 /* Get a clientid to give to the client */ 1643 cidp = (cid *)&cp->clientid; 1644 cidp->impl_id.start_time = rfs4_start_time; 1645 cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->dbe); 1646 1647 /* If we are booted as a cluster node, embed our nodeid */ 1648 if (cluster_bootflags & CLUSTER_BOOTED) 1649 embed_nodeid(cidp); 1650 1651 /* Allocate and copy client's client id value */ 1652 cp->nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP); 1653 cp->nfs_client.id_len = client->id_len; 1654 bcopy(client->id_val, cp->nfs_client.id_val, client->id_len); 1655 cp->nfs_client.verifier = client->verifier; 1656 1657 /* Init the value for the SETCLIENTID_CONFIRM verifier */ 1658 scvp = (scid_confirm_verf *)&cp->confirm_verf; 1659 scvp->cv_impl.c_id = cidp->impl_id.c_id; 1660 scvp->cv_impl.gen_num = 0; 1661 1662 /* An F_UNLKSYS has been done for this client */ 1663 cp->unlksys_completed = FALSE; 1664 1665 /* We need the client to ack us */ 1666 cp->need_confirm = TRUE; 1667 cp->cp_confirmed = NULL; 1668 1669 /* TRUE all the time until the callback path actually fails */ 1670 cp->cbinfo.cb_notified_of_cb_path_down = TRUE; 1671 1672 /* Initialize the access time to now */ 1673 cp->last_access = gethrestime_sec(); 1674 1675 cp->cr_set = NULL; 1676 /* Initialize list for insque/remque */ 1677 cp->openownerlist.next = cp->openownerlist.prev = &cp->openownerlist; 1678 cp->openownerlist.oop = NULL; /* This is not an openowner */ 1679 1680 cp->sysidt = LM_NOSYSID; 1681 1682 cp->clientdeleglist.next = cp->clientdeleglist.prev = 1683 &cp->clientdeleglist; 1684 cp->clientdeleglist.dsp = NULL; 1685 1686 /* set up the callback control structure */ 1687 cp->cbinfo.cb_state = CB_UNINIT; 1688 mutex_init(cp->cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL); 1689 cv_init(cp->cbinfo.cb_cv, NULL, CV_DEFAULT, NULL); 1690 cv_init(cp->cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL); 1691 1692 /* 1693 * Associate the client_t with the current server instance. 1694 * The hold is solely to satisfy the calling requirement of 1695 * rfs4_servinst_assign(). In this case it's not strictly necessary. 1696 */ 1697 rfs4_dbe_hold(cp->dbe); 1698 rfs4_servinst_assign(cp, rfs4_cur_servinst); 1699 rfs4_dbe_rele(cp->dbe); 1700 1701 return (TRUE); 1702 } 1703 1704 /* 1705 * Caller wants to generate/update the setclientid_confirm verifier 1706 * associated with a client. This is done during the SETCLIENTID 1707 * processing. 1708 */ 1709 void 1710 rfs4_client_scv_next(rfs4_client_t *cp) 1711 { 1712 scid_confirm_verf *scvp; 1713 1714 /* Init the value for the SETCLIENTID_CONFIRM verifier */ 1715 scvp = (scid_confirm_verf *)&cp->confirm_verf; 1716 scvp->cv_impl.gen_num++; 1717 } 1718 1719 void 1720 rfs4_client_rele(rfs4_client_t *cp) 1721 { 1722 rfs4_dbe_rele(cp->dbe); 1723 } 1724 1725 rfs4_client_t * 1726 rfs4_findclient(nfs_client_id4 *client, bool_t *create, rfs4_client_t *oldcp) 1727 { 1728 rfs4_client_t *cp; 1729 1730 1731 if (oldcp) { 1732 rw_enter(&rfs4_findclient_lock, RW_WRITER); 1733 rfs4_dbe_hide(oldcp->dbe); 1734 } else { 1735 rw_enter(&rfs4_findclient_lock, RW_READER); 1736 } 1737 1738 cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_nfsclnt_idx, client, 1739 create, (void *)client, RFS4_DBS_VALID); 1740 1741 if (oldcp) 1742 rfs4_dbe_unhide(oldcp->dbe); 1743 1744 rw_exit(&rfs4_findclient_lock); 1745 1746 return (cp); 1747 } 1748 1749 rfs4_client_t * 1750 rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed) 1751 { 1752 rfs4_client_t *cp; 1753 bool_t create = FALSE; 1754 cid *cidp = (cid *)&clientid; 1755 1756 /* If we're a cluster and the nodeid isn't right, short-circuit */ 1757 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp)) 1758 return (NULL); 1759 1760 rw_enter(&rfs4_findclient_lock, RW_READER); 1761 1762 cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx, &clientid, 1763 &create, NULL, RFS4_DBS_VALID); 1764 1765 rw_exit(&rfs4_findclient_lock); 1766 1767 if (cp && cp->need_confirm && find_unconfirmed == FALSE) { 1768 rfs4_client_rele(cp); 1769 return (NULL); 1770 } else { 1771 return (cp); 1772 } 1773 } 1774 1775 bool_t 1776 rfs4_lease_expired(rfs4_client_t *cp) 1777 { 1778 bool_t rc; 1779 1780 rfs4_dbe_lock(cp->dbe); 1781 1782 /* 1783 * If the admin has executed clear_locks for this 1784 * client id, force expire will be set, so no need 1785 * to calculate anything because it's "outa here". 1786 */ 1787 if (cp->forced_expire) { 1788 rc = TRUE; 1789 } else { 1790 rc = (gethrestime_sec() - cp->last_access > rfs4_lease_time); 1791 } 1792 1793 /* 1794 * If the lease has expired we will also want 1795 * to remove any stable storage state data. So 1796 * mark the client id accordingly. 1797 */ 1798 if (!cp->ss_remove) 1799 cp->ss_remove = (rc == TRUE); 1800 1801 rfs4_dbe_unlock(cp->dbe); 1802 1803 return (rc); 1804 } 1805 1806 void 1807 rfs4_update_lease(rfs4_client_t *cp) 1808 { 1809 rfs4_dbe_lock(cp->dbe); 1810 if (!cp->forced_expire) 1811 cp->last_access = gethrestime_sec(); 1812 rfs4_dbe_unlock(cp->dbe); 1813 } 1814 1815 1816 static bool_t 1817 EQOPENOWNER(open_owner4 *a, open_owner4 *b) 1818 { 1819 bool_t rc; 1820 1821 if (a->clientid != b->clientid) 1822 return (FALSE); 1823 1824 if (a->owner_len != b->owner_len) 1825 return (FALSE); 1826 1827 rc = (bcmp(a->owner_val, b->owner_val, a->owner_len) == 0); 1828 1829 return (rc); 1830 } 1831 1832 static uint_t 1833 openowner_hash(void *key) 1834 { 1835 int i; 1836 open_owner4 *openowner = key; 1837 uint_t hash = 0; 1838 1839 for (i = 0; i < openowner->owner_len; i++) { 1840 hash <<= 4; 1841 hash += (uint_t)openowner->owner_val[i]; 1842 } 1843 hash += (uint_t)openowner->clientid; 1844 hash |= (openowner->clientid >> 32); 1845 1846 return (hash); 1847 } 1848 1849 static bool_t 1850 openowner_compare(rfs4_entry_t u_entry, void *key) 1851 { 1852 rfs4_openowner_t *op = (rfs4_openowner_t *)u_entry; 1853 open_owner4 *arg = key; 1854 1855 return (EQOPENOWNER(&op->owner, arg)); 1856 } 1857 1858 void * 1859 openowner_mkkey(rfs4_entry_t u_entry) 1860 { 1861 rfs4_openowner_t *op = (rfs4_openowner_t *)u_entry; 1862 1863 return (&op->owner); 1864 } 1865 1866 static bool_t 1867 rfs4_openowner_expiry(rfs4_entry_t u_entry) 1868 { 1869 rfs4_openowner_t *op = (rfs4_openowner_t *)u_entry; 1870 1871 if (rfs4_dbe_is_invalid(op->dbe)) 1872 return (TRUE); 1873 return ((gethrestime_sec() - op->client->last_access 1874 > rfs4_lease_time)); 1875 } 1876 1877 static void 1878 rfs4_openowner_destroy(rfs4_entry_t u_entry) 1879 { 1880 rfs4_openowner_t *op = (rfs4_openowner_t *)u_entry; 1881 1882 rfs4_sw_destroy(&op->oo_sw); 1883 1884 /* Remove open owner from client's lists of open owners */ 1885 rfs4_dbe_lock(op->client->dbe); 1886 1887 remque(&op->openownerlist); 1888 op->openownerlist.next = op->openownerlist.prev = &op->openownerlist; 1889 1890 rfs4_dbe_unlock(op->client->dbe); 1891 1892 /* One less reference to the client */ 1893 rfs4_client_rele(op->client); 1894 op->client = NULL; 1895 1896 /* Free the last reply for this lock owner */ 1897 rfs4_free_reply(op->reply); 1898 1899 if (op->reply_fh.nfs_fh4_val) { 1900 kmem_free(op->reply_fh.nfs_fh4_val, op->reply_fh.nfs_fh4_len); 1901 op->reply_fh.nfs_fh4_val = NULL; 1902 op->reply_fh.nfs_fh4_len = 0; 1903 } 1904 1905 /* Free the lock owner id */ 1906 kmem_free(op->owner.owner_val, op->owner.owner_len); 1907 } 1908 1909 void 1910 rfs4_openowner_rele(rfs4_openowner_t *op) 1911 { 1912 rfs4_dbe_rele(op->dbe); 1913 } 1914 1915 static bool_t 1916 rfs4_openowner_create(rfs4_entry_t u_entry, void *arg) 1917 { 1918 rfs4_openowner_t *op = (rfs4_openowner_t *)u_entry; 1919 rfs4_openowner_t *argp = (rfs4_openowner_t *)arg; 1920 open_owner4 *openowner = &argp->owner; 1921 seqid4 seqid = argp->open_seqid; 1922 rfs4_client_t *cp; 1923 bool_t create = FALSE; 1924 1925 rw_enter(&rfs4_findclient_lock, RW_READER); 1926 1927 cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx, 1928 &openowner->clientid, 1929 &create, NULL, RFS4_DBS_VALID); 1930 1931 rw_exit(&rfs4_findclient_lock); 1932 1933 if (cp == NULL) 1934 return (FALSE); 1935 1936 op->reply_fh.nfs_fh4_len = 0; 1937 op->reply_fh.nfs_fh4_val = NULL; 1938 1939 op->owner.clientid = openowner->clientid; 1940 op->owner.owner_val = 1941 kmem_alloc(openowner->owner_len, KM_SLEEP); 1942 1943 bcopy(openowner->owner_val, 1944 op->owner.owner_val, openowner->owner_len); 1945 1946 op->owner.owner_len = openowner->owner_len; 1947 1948 op->need_confirm = TRUE; 1949 1950 rfs4_sw_init(&op->oo_sw); 1951 1952 op->open_seqid = seqid; 1953 bzero(op->reply, sizeof (nfs_resop4)); 1954 op->client = cp; 1955 op->cr_set = NULL; 1956 /* Init lists for remque/insque */ 1957 op->ownerstateids.next = op->ownerstateids.prev = &op->ownerstateids; 1958 op->ownerstateids.sp = NULL; /* NULL since this is the state list */ 1959 op->openownerlist.next = op->openownerlist.prev = &op->openownerlist; 1960 op->openownerlist.oop = op; /* ourselves */ 1961 1962 /* Insert openowner into client's open owner list */ 1963 rfs4_dbe_lock(cp->dbe); 1964 1965 insque(&op->openownerlist, cp->openownerlist.prev); 1966 1967 rfs4_dbe_unlock(cp->dbe); 1968 1969 return (TRUE); 1970 } 1971 1972 rfs4_openowner_t * 1973 rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid) 1974 { 1975 rfs4_openowner_t *op; 1976 rfs4_openowner_t arg; 1977 1978 arg.owner = *openowner; 1979 arg.open_seqid = seqid; 1980 op = (rfs4_openowner_t *)rfs4_dbsearch(rfs4_openowner_idx, openowner, 1981 create, &arg, RFS4_DBS_VALID); 1982 1983 return (op); 1984 } 1985 1986 void 1987 rfs4_update_open_sequence(rfs4_openowner_t *op) 1988 { 1989 1990 rfs4_dbe_lock(op->dbe); 1991 1992 op->open_seqid++; 1993 1994 rfs4_dbe_unlock(op->dbe); 1995 } 1996 1997 void 1998 rfs4_update_open_resp(rfs4_openowner_t *op, nfs_resop4 *resp, nfs_fh4 *fh) 1999 { 2000 2001 rfs4_dbe_lock(op->dbe); 2002 2003 rfs4_free_reply(op->reply); 2004 2005 rfs4_copy_reply(op->reply, resp); 2006 2007 /* Save the filehandle if provided and free if not used */ 2008 if (resp->nfs_resop4_u.opopen.status == NFS4_OK && 2009 fh && fh->nfs_fh4_len) { 2010 if (op->reply_fh.nfs_fh4_val == NULL) 2011 op->reply_fh.nfs_fh4_val = 2012 kmem_alloc(fh->nfs_fh4_len, KM_SLEEP); 2013 nfs_fh4_copy(fh, &op->reply_fh); 2014 } else { 2015 if (op->reply_fh.nfs_fh4_val) { 2016 kmem_free(op->reply_fh.nfs_fh4_val, 2017 op->reply_fh.nfs_fh4_len); 2018 op->reply_fh.nfs_fh4_val = NULL; 2019 op->reply_fh.nfs_fh4_len = 0; 2020 } 2021 } 2022 2023 rfs4_dbe_unlock(op->dbe); 2024 } 2025 2026 static bool_t 2027 lockowner_compare(rfs4_entry_t u_entry, void *key) 2028 { 2029 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry; 2030 lock_owner4 *b = (lock_owner4 *)key; 2031 2032 if (lo->owner.clientid != b->clientid) 2033 return (FALSE); 2034 2035 if (lo->owner.owner_len != b->owner_len) 2036 return (FALSE); 2037 2038 return (bcmp(lo->owner.owner_val, b->owner_val, 2039 lo->owner.owner_len) == 0); 2040 } 2041 2042 void * 2043 lockowner_mkkey(rfs4_entry_t u_entry) 2044 { 2045 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry; 2046 2047 return (&lo->owner); 2048 } 2049 2050 static uint32_t 2051 lockowner_hash(void *key) 2052 { 2053 int i; 2054 lock_owner4 *lockowner = key; 2055 uint_t hash = 0; 2056 2057 for (i = 0; i < lockowner->owner_len; i++) { 2058 hash <<= 4; 2059 hash += (uint_t)lockowner->owner_val[i]; 2060 } 2061 hash += (uint_t)lockowner->clientid; 2062 hash |= (lockowner->clientid >> 32); 2063 2064 return (hash); 2065 } 2066 2067 static uint32_t 2068 pid_hash(void *key) 2069 { 2070 return ((uint32_t)(uintptr_t)key); 2071 } 2072 2073 static void * 2074 pid_mkkey(rfs4_entry_t u_entry) 2075 { 2076 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry; 2077 2078 return ((void *)(uintptr_t)lo->pid); 2079 } 2080 2081 static bool_t 2082 pid_compare(rfs4_entry_t u_entry, void *key) 2083 { 2084 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry; 2085 2086 return (lo->pid == (pid_t)(uintptr_t)key); 2087 } 2088 2089 static void 2090 rfs4_lockowner_destroy(rfs4_entry_t u_entry) 2091 { 2092 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry; 2093 2094 /* Free the lock owner id */ 2095 kmem_free(lo->owner.owner_val, lo->owner.owner_len); 2096 rfs4_client_rele(lo->client); 2097 } 2098 2099 void 2100 rfs4_lockowner_rele(rfs4_lockowner_t *lo) 2101 { 2102 rfs4_dbe_rele(lo->dbe); 2103 } 2104 2105 /* ARGSUSED */ 2106 static bool_t 2107 rfs4_lockowner_expiry(rfs4_entry_t u_entry) 2108 { 2109 /* 2110 * Since expiry is called with no other references on 2111 * this struct, go ahead and have it removed. 2112 */ 2113 return (TRUE); 2114 } 2115 2116 static bool_t 2117 rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg) 2118 { 2119 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry; 2120 lock_owner4 *lockowner = (lock_owner4 *)arg; 2121 rfs4_client_t *cp; 2122 bool_t create = FALSE; 2123 2124 rw_enter(&rfs4_findclient_lock, RW_READER); 2125 2126 cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx, 2127 &lockowner->clientid, 2128 &create, NULL, RFS4_DBS_VALID); 2129 2130 rw_exit(&rfs4_findclient_lock); 2131 2132 if (cp == NULL) 2133 return (FALSE); 2134 2135 /* Reference client */ 2136 lo->client = cp; 2137 lo->owner.clientid = lockowner->clientid; 2138 lo->owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP); 2139 bcopy(lockowner->owner_val, lo->owner.owner_val, lockowner->owner_len); 2140 lo->owner.owner_len = lockowner->owner_len; 2141 lo->pid = rfs4_dbe_getid(lo->dbe); 2142 2143 return (TRUE); 2144 } 2145 2146 rfs4_lockowner_t * 2147 rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create) 2148 { 2149 rfs4_lockowner_t *lo; 2150 2151 lo = (rfs4_lockowner_t *)rfs4_dbsearch(rfs4_lockowner_idx, lockowner, 2152 create, lockowner, RFS4_DBS_VALID); 2153 2154 return (lo); 2155 } 2156 2157 rfs4_lockowner_t * 2158 rfs4_findlockowner_by_pid(pid_t pid) 2159 { 2160 rfs4_lockowner_t *lo; 2161 bool_t create = FALSE; 2162 2163 lo = (rfs4_lockowner_t *)rfs4_dbsearch(rfs4_lockowner_pid_idx, 2164 (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID); 2165 2166 return (lo); 2167 } 2168 2169 2170 static uint32_t 2171 file_hash(void *key) 2172 { 2173 return (ADDRHASH(key)); 2174 } 2175 2176 static void * 2177 file_mkkey(rfs4_entry_t u_entry) 2178 { 2179 rfs4_file_t *fp = (rfs4_file_t *)u_entry; 2180 2181 return (fp->vp); 2182 } 2183 2184 static bool_t 2185 file_compare(rfs4_entry_t u_entry, void *key) 2186 { 2187 rfs4_file_t *fp = (rfs4_file_t *)u_entry; 2188 2189 return (fp->vp == (vnode_t *)key); 2190 } 2191 2192 static void 2193 rfs4_file_destroy(rfs4_entry_t u_entry) 2194 { 2195 rfs4_file_t *fp = (rfs4_file_t *)u_entry; 2196 2197 ASSERT(fp->delegationlist.next == &fp->delegationlist); 2198 if (fp->filehandle.nfs_fh4_val) 2199 kmem_free(fp->filehandle.nfs_fh4_val, 2200 fp->filehandle.nfs_fh4_len); 2201 cv_destroy(fp->dinfo->recall_cv); 2202 if (fp->vp) { 2203 vnode_t *vp = fp->vp; 2204 2205 mutex_enter(&vp->v_lock); 2206 (void) vsd_set(vp, nfs4_srv_vkey, NULL); 2207 mutex_exit(&vp->v_lock); 2208 VN_RELE(vp); 2209 fp->vp = NULL; 2210 } 2211 rw_destroy(&fp->file_rwlock); 2212 } 2213 2214 /* 2215 * Used to unlock the underlying dbe struct only 2216 */ 2217 void 2218 rfs4_file_rele(rfs4_file_t *fp) 2219 { 2220 rfs4_dbe_rele(fp->dbe); 2221 } 2222 2223 /* 2224 * Used to unlock the file rw lock and the file's dbe entry 2225 * Only used to pair with rfs4_findfile_withlock() 2226 */ 2227 void 2228 rfs4_file_rele_withunlock(rfs4_file_t *fp) 2229 { 2230 rw_exit(&fp->file_rwlock); 2231 rfs4_dbe_rele(fp->dbe); 2232 } 2233 2234 typedef struct { 2235 vnode_t *vp; 2236 nfs_fh4 *fh; 2237 } rfs4_fcreate_arg; 2238 2239 static bool_t 2240 rfs4_file_create(rfs4_entry_t u_entry, void *arg) 2241 { 2242 rfs4_file_t *fp = (rfs4_file_t *)u_entry; 2243 rfs4_fcreate_arg *ap = (rfs4_fcreate_arg *)arg; 2244 vnode_t *vp = ap->vp; 2245 nfs_fh4 *fh = ap->fh; 2246 2247 VN_HOLD(vp); 2248 2249 fp->filehandle.nfs_fh4_len = 0; 2250 fp->filehandle.nfs_fh4_val = NULL; 2251 ASSERT(fh && fh->nfs_fh4_len); 2252 if (fh && fh->nfs_fh4_len) { 2253 fp->filehandle.nfs_fh4_val = 2254 kmem_alloc(fh->nfs_fh4_len, KM_SLEEP); 2255 nfs_fh4_copy(fh, &fp->filehandle); 2256 } 2257 fp->vp = vp; 2258 2259 /* Init list for remque/insque */ 2260 fp->delegationlist.next = fp->delegationlist.prev = 2261 &fp->delegationlist; 2262 fp->delegationlist.dsp = NULL; /* NULL since this is state list */ 2263 2264 fp->share_deny = fp->share_access = fp->access_read = 0; 2265 fp->access_write = fp->deny_read = fp->deny_write = 0; 2266 2267 mutex_init(fp->dinfo->recall_lock, NULL, MUTEX_DEFAULT, NULL); 2268 cv_init(fp->dinfo->recall_cv, NULL, CV_DEFAULT, NULL); 2269 2270 fp->dinfo->dtype = OPEN_DELEGATE_NONE; 2271 2272 rw_init(&fp->file_rwlock, NULL, RW_DEFAULT, NULL); 2273 2274 mutex_enter(&vp->v_lock); 2275 if (vsd_set(vp, nfs4_srv_vkey, (void *)fp)) { 2276 ASSERT(FALSE); 2277 cmn_err(CE_WARN, "rfs4_file_create: vsd_set failed."); 2278 } 2279 mutex_exit(&vp->v_lock); 2280 2281 return (TRUE); 2282 } 2283 2284 rfs4_file_t * 2285 rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create) 2286 { 2287 rfs4_file_t *fp; 2288 rfs4_fcreate_arg arg; 2289 2290 arg.vp = vp; 2291 arg.fh = fh; 2292 2293 if (*create == TRUE) 2294 fp = (rfs4_file_t *)rfs4_dbsearch(rfs4_file_idx, vp, create, 2295 &arg, RFS4_DBS_VALID); 2296 else { 2297 mutex_enter(&vp->v_lock); 2298 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey); 2299 mutex_exit(&vp->v_lock); 2300 if (fp) { 2301 rfs4_dbe_lock(fp->dbe); 2302 if (rfs4_dbe_is_invalid(fp->dbe) || 2303 (rfs4_dbe_refcnt(fp->dbe) == 0)) { 2304 rfs4_dbe_unlock(fp->dbe); 2305 fp = NULL; 2306 } else { 2307 rfs4_dbe_hold(fp->dbe); 2308 rfs4_dbe_unlock(fp->dbe); 2309 } 2310 } 2311 } 2312 return (fp); 2313 } 2314 2315 /* 2316 * Find a file in the db and once it is located, take the rw lock. 2317 * Need to check the vnode pointer and if it does not exist (it was 2318 * removed between the db location and check) redo the find. This 2319 * assumes that a file struct that has a NULL vnode pointer is marked 2320 * at 'invalid' and will not be found in the db the second time 2321 * around. 2322 */ 2323 rfs4_file_t * 2324 rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create) 2325 { 2326 rfs4_file_t *fp; 2327 rfs4_fcreate_arg arg; 2328 bool_t screate = *create; 2329 2330 if (screate == FALSE) { 2331 mutex_enter(&vp->v_lock); 2332 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey); 2333 mutex_exit(&vp->v_lock); 2334 if (fp) { 2335 rfs4_dbe_lock(fp->dbe); 2336 if (rfs4_dbe_is_invalid(fp->dbe) || 2337 (rfs4_dbe_refcnt(fp->dbe) == 0)) { 2338 rfs4_dbe_unlock(fp->dbe); 2339 fp = NULL; 2340 } else { 2341 rfs4_dbe_hold(fp->dbe); 2342 rfs4_dbe_unlock(fp->dbe); 2343 rw_enter(&fp->file_rwlock, RW_WRITER); 2344 } 2345 } 2346 } else { 2347 retry: 2348 arg.vp = vp; 2349 arg.fh = fh; 2350 2351 fp = (rfs4_file_t *)rfs4_dbsearch(rfs4_file_idx, vp, create, 2352 &arg, RFS4_DBS_VALID); 2353 if (fp != NULL) { 2354 rw_enter(&fp->file_rwlock, RW_WRITER); 2355 if (fp->vp == NULL) { 2356 rw_exit(&fp->file_rwlock); 2357 rfs4_file_rele(fp); 2358 *create = screate; 2359 goto retry; 2360 } 2361 } 2362 } 2363 2364 return (fp); 2365 } 2366 2367 static uint32_t 2368 lo_state_hash(void *key) 2369 { 2370 stateid_t *id = key; 2371 2372 return (id->bits.ident+id->bits.pid); 2373 } 2374 2375 static bool_t 2376 lo_state_compare(rfs4_entry_t u_entry, void *key) 2377 { 2378 rfs4_lo_state_t *lop = (rfs4_lo_state_t *)u_entry; 2379 stateid_t *id = key; 2380 bool_t rc; 2381 2382 rc = (lop->lockid.bits.boottime == id->bits.boottime && 2383 lop->lockid.bits.type == id->bits.type && 2384 lop->lockid.bits.ident == id->bits.ident && 2385 lop->lockid.bits.pid == id->bits.pid); 2386 2387 return (rc); 2388 } 2389 2390 static void * 2391 lo_state_mkkey(rfs4_entry_t u_entry) 2392 { 2393 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry; 2394 2395 return (&lsp->lockid); 2396 } 2397 2398 static bool_t 2399 rfs4_lo_state_expiry(rfs4_entry_t u_entry) 2400 { 2401 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry; 2402 2403 if (rfs4_dbe_is_invalid(lsp->dbe)) 2404 return (TRUE); 2405 if (lsp->state->closed) 2406 return (TRUE); 2407 return ((gethrestime_sec() - lsp->state->owner->client->last_access 2408 > rfs4_lease_time)); 2409 } 2410 2411 static void 2412 rfs4_lo_state_destroy(rfs4_entry_t u_entry) 2413 { 2414 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry; 2415 2416 rfs4_sw_destroy(&lsp->ls_sw); 2417 2418 /* Make sure to release the file locks */ 2419 if (lsp->locks_cleaned == FALSE) { 2420 lsp->locks_cleaned = TRUE; 2421 if (lsp->locker->client->sysidt != LM_NOSYSID) { 2422 /* Is the PxFS kernel module loaded? */ 2423 if (lm_remove_file_locks != NULL) { 2424 int new_sysid; 2425 2426 /* Encode the cluster nodeid in new sysid */ 2427 new_sysid = lsp->locker->client->sysidt; 2428 lm_set_nlmid_flk(&new_sysid); 2429 2430 /* 2431 * This PxFS routine removes file locks for a 2432 * client over all nodes of a cluster. 2433 */ 2434 DTRACE_PROBE1(nfss_i_clust_rm_lck, 2435 int, new_sysid); 2436 (*lm_remove_file_locks)(new_sysid); 2437 } else { 2438 (void) cleanlocks(lsp->state->finfo->vp, 2439 lsp->locker->pid, 2440 lsp->locker->client->sysidt); 2441 } 2442 } 2443 } 2444 2445 rfs4_dbe_lock(lsp->state->dbe); 2446 2447 remque(&lsp->lockownerlist); 2448 lsp->lockownerlist.next = lsp->lockownerlist.prev = 2449 &lsp->lockownerlist; 2450 2451 rfs4_dbe_unlock(lsp->state->dbe); 2452 2453 /* Free the last reply for this state */ 2454 rfs4_free_reply(lsp->reply); 2455 2456 rfs4_lockowner_rele(lsp->locker); 2457 lsp->locker = NULL; 2458 2459 rfs4_state_rele_nounlock(lsp->state); 2460 lsp->state = NULL; 2461 } 2462 2463 static bool_t 2464 rfs4_lo_state_create(rfs4_entry_t u_entry, void *arg) 2465 { 2466 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry; 2467 rfs4_lo_state_t *argp = (rfs4_lo_state_t *)arg; 2468 rfs4_lockowner_t *lo = argp->locker; 2469 rfs4_state_t *sp = argp->state; 2470 2471 lsp->state = sp; 2472 2473 lsp->lockid = sp->stateid; 2474 lsp->lockid.bits.type = LOCKID; 2475 lsp->lockid.bits.chgseq = 0; 2476 lsp->lockid.bits.pid = lo->pid; 2477 2478 lsp->locks_cleaned = FALSE; 2479 lsp->lock_completed = FALSE; 2480 2481 rfs4_sw_init(&lsp->ls_sw); 2482 2483 /* Attached the supplied lock owner */ 2484 rfs4_dbe_hold(lo->dbe); 2485 lsp->locker = lo; 2486 2487 lsp->lockownerlist.next = lsp->lockownerlist.prev = 2488 &lsp->lockownerlist; 2489 lsp->lockownerlist.lsp = lsp; 2490 2491 rfs4_dbe_lock(sp->dbe); 2492 2493 insque(&lsp->lockownerlist, sp->lockownerlist.prev); 2494 2495 rfs4_dbe_hold(sp->dbe); 2496 2497 rfs4_dbe_unlock(sp->dbe); 2498 2499 return (TRUE); 2500 } 2501 2502 void 2503 rfs4_lo_state_rele(rfs4_lo_state_t *lsp, bool_t unlock_fp) 2504 { 2505 if (unlock_fp == TRUE) 2506 rw_exit(&lsp->state->finfo->file_rwlock); 2507 rfs4_dbe_rele(lsp->dbe); 2508 } 2509 2510 static rfs4_lo_state_t * 2511 rfs4_findlo_state(stateid_t *id, bool_t lock_fp) 2512 { 2513 rfs4_lo_state_t *lsp; 2514 bool_t create = FALSE; 2515 2516 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(rfs4_lo_state_idx, id, 2517 &create, NULL, RFS4_DBS_VALID); 2518 if (lock_fp == TRUE && lsp != NULL) 2519 rw_enter(&lsp->state->finfo->file_rwlock, RW_READER); 2520 2521 return (lsp); 2522 } 2523 2524 2525 static uint32_t 2526 lo_state_lo_hash(void *key) 2527 { 2528 rfs4_lo_state_t *lop = key; 2529 2530 return (ADDRHASH(lop->locker) ^ ADDRHASH(lop->state)); 2531 } 2532 2533 static bool_t 2534 lo_state_lo_compare(rfs4_entry_t u_entry, void *key) 2535 { 2536 rfs4_lo_state_t *lop = (rfs4_lo_state_t *)u_entry; 2537 rfs4_lo_state_t *keyp = key; 2538 2539 return (keyp->locker == lop->locker && keyp->state == lop->state); 2540 } 2541 2542 static void * 2543 lo_state_lo_mkkey(rfs4_entry_t u_entry) 2544 { 2545 return (u_entry); 2546 } 2547 2548 rfs4_lo_state_t * 2549 rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, 2550 rfs4_state_t *sp, bool_t *create) 2551 { 2552 rfs4_lo_state_t *lsp; 2553 rfs4_lo_state_t arg; 2554 2555 arg.locker = lo; 2556 arg.state = sp; 2557 2558 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(rfs4_lo_state_owner_idx, &arg, 2559 create, &arg, RFS4_DBS_VALID); 2560 2561 return (lsp); 2562 } 2563 2564 static stateid_t 2565 get_stateid(id_t eid) 2566 { 2567 stateid_t id; 2568 2569 id.bits.boottime = rfs4_start_time; 2570 id.bits.ident = eid; 2571 id.bits.chgseq = 0; 2572 id.bits.type = 0; 2573 id.bits.pid = 0; 2574 2575 /* 2576 * If we are booted as a cluster node, embed our nodeid. 2577 * We've already done sanity checks in rfs4_client_create() so no 2578 * need to repeat them here. 2579 */ 2580 id.bits.clnodeid = (cluster_bootflags & CLUSTER_BOOTED) ? 2581 clconf_get_nodeid() : 0; 2582 2583 return (id); 2584 } 2585 2586 /* 2587 * For use only when booted as a cluster node. 2588 * Returns TRUE if the embedded nodeid indicates that this stateid was 2589 * generated on another node. 2590 */ 2591 static int 2592 foreign_stateid(stateid_t *id) 2593 { 2594 ASSERT(cluster_bootflags & CLUSTER_BOOTED); 2595 return (id->bits.clnodeid != (uint32_t)clconf_get_nodeid()); 2596 } 2597 2598 /* 2599 * For use only when booted as a cluster node. 2600 * Returns TRUE if the embedded nodeid indicates that this clientid was 2601 * generated on another node. 2602 */ 2603 static int 2604 foreign_clientid(cid *cidp) 2605 { 2606 ASSERT(cluster_bootflags & CLUSTER_BOOTED); 2607 return (cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT != 2608 (uint32_t)clconf_get_nodeid()); 2609 } 2610 2611 /* 2612 * For use only when booted as a cluster node. 2613 * Embed our cluster nodeid into the clientid. 2614 */ 2615 static void 2616 embed_nodeid(cid *cidp) 2617 { 2618 int clnodeid; 2619 /* 2620 * Currently, our state tables are small enough that their 2621 * ids will leave enough bits free for the nodeid. If the 2622 * tables become larger, we mustn't overwrite the id. 2623 * Equally, we only have room for so many bits of nodeid, so 2624 * must check that too. 2625 */ 2626 ASSERT(cluster_bootflags & CLUSTER_BOOTED); 2627 ASSERT(cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT == 0); 2628 clnodeid = clconf_get_nodeid(); 2629 ASSERT(clnodeid <= CLUSTER_MAX_NODEID); 2630 ASSERT(clnodeid != NODEID_UNKNOWN); 2631 cidp->impl_id.c_id |= (clnodeid << CLUSTER_NODEID_SHIFT); 2632 } 2633 2634 static uint32_t 2635 state_hash(void *key) 2636 { 2637 stateid_t *ip = (stateid_t *)key; 2638 2639 return (ip->bits.ident); 2640 } 2641 2642 static bool_t 2643 state_compare(rfs4_entry_t u_entry, void *key) 2644 { 2645 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 2646 stateid_t *id = (stateid_t *)key; 2647 bool_t rc; 2648 2649 rc = (sp->stateid.bits.boottime == id->bits.boottime && 2650 sp->stateid.bits.ident == id->bits.ident); 2651 2652 return (rc); 2653 } 2654 2655 static void * 2656 state_mkkey(rfs4_entry_t u_entry) 2657 { 2658 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 2659 2660 return (&sp->stateid); 2661 } 2662 2663 static void 2664 rfs4_state_destroy(rfs4_entry_t u_entry) 2665 { 2666 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 2667 2668 ASSERT(&sp->lockownerlist == sp->lockownerlist.next); 2669 2670 /* release any share locks for this stateid if it's still open */ 2671 if (!sp->closed) 2672 rfs4_unshare(sp); 2673 2674 /* Were done with the file */ 2675 rfs4_file_rele(sp->finfo); 2676 sp->finfo = NULL; 2677 2678 /* And now with the openowner */ 2679 rfs4_dbe_lock(sp->owner->dbe); 2680 2681 remque(&sp->ownerstateids); 2682 sp->ownerstateids.next = sp->ownerstateids.prev = &sp->ownerstateids; 2683 2684 rfs4_dbe_unlock(sp->owner->dbe); 2685 2686 rfs4_openowner_rele(sp->owner); 2687 sp->owner = NULL; 2688 } 2689 2690 static void 2691 rfs4_state_rele_nounlock(rfs4_state_t *sp) 2692 { 2693 rfs4_dbe_rele(sp->dbe); 2694 } 2695 2696 void 2697 rfs4_state_rele(rfs4_state_t *sp) 2698 { 2699 rw_exit(&sp->finfo->file_rwlock); 2700 rfs4_dbe_rele(sp->dbe); 2701 } 2702 2703 static uint32_t 2704 deleg_hash(void *key) 2705 { 2706 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)key; 2707 2708 return (ADDRHASH(dsp->client) ^ ADDRHASH(dsp->finfo)); 2709 } 2710 2711 static bool_t 2712 deleg_compare(rfs4_entry_t u_entry, void *key) 2713 { 2714 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry; 2715 rfs4_deleg_state_t *kdsp = (rfs4_deleg_state_t *)key; 2716 2717 return (dsp->client == kdsp->client && dsp->finfo == kdsp->finfo); 2718 } 2719 2720 static void * 2721 deleg_mkkey(rfs4_entry_t u_entry) 2722 { 2723 return (u_entry); 2724 } 2725 2726 static uint32_t 2727 deleg_state_hash(void *key) 2728 { 2729 stateid_t *ip = (stateid_t *)key; 2730 2731 return (ip->bits.ident); 2732 } 2733 2734 static bool_t 2735 deleg_state_compare(rfs4_entry_t u_entry, void *key) 2736 { 2737 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry; 2738 stateid_t *id = (stateid_t *)key; 2739 bool_t rc; 2740 2741 if (id->bits.type != DELEGID) 2742 return (FALSE); 2743 2744 rc = (dsp->delegid.bits.boottime == id->bits.boottime && 2745 dsp->delegid.bits.ident == id->bits.ident); 2746 2747 return (rc); 2748 } 2749 2750 static void * 2751 deleg_state_mkkey(rfs4_entry_t u_entry) 2752 { 2753 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry; 2754 2755 return (&dsp->delegid); 2756 } 2757 2758 static bool_t 2759 rfs4_deleg_state_expiry(rfs4_entry_t u_entry) 2760 { 2761 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry; 2762 2763 if (rfs4_dbe_is_invalid(dsp->dbe)) 2764 return (TRUE); 2765 2766 if ((gethrestime_sec() - dsp->client->last_access 2767 > rfs4_lease_time)) { 2768 rfs4_dbe_invalidate(dsp->dbe); 2769 return (TRUE); 2770 } 2771 2772 return (FALSE); 2773 } 2774 2775 static bool_t 2776 rfs4_deleg_state_create(rfs4_entry_t u_entry, void *argp) 2777 { 2778 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry; 2779 rfs4_file_t *fp = ((rfs4_deleg_state_t *)argp)->finfo; 2780 rfs4_client_t *cp = ((rfs4_deleg_state_t *)argp)->client; 2781 2782 rfs4_dbe_hold(fp->dbe); 2783 rfs4_dbe_hold(cp->dbe); 2784 2785 dsp->delegid = get_stateid(rfs4_dbe_getid(dsp->dbe)); 2786 dsp->delegid.bits.type = DELEGID; 2787 dsp->finfo = fp; 2788 dsp->client = cp; 2789 dsp->dtype = OPEN_DELEGATE_NONE; 2790 2791 dsp->time_granted = gethrestime_sec(); /* observability */ 2792 dsp->time_revoked = 0; 2793 2794 /* Init lists for remque/insque */ 2795 dsp->delegationlist.next = dsp->delegationlist.prev = 2796 &dsp->delegationlist; 2797 dsp->delegationlist.dsp = dsp; 2798 2799 dsp->clientdeleglist.next = dsp->clientdeleglist.prev = 2800 &dsp->clientdeleglist; 2801 dsp->clientdeleglist.dsp = dsp; 2802 2803 /* Insert state on per open owner's list */ 2804 rfs4_dbe_lock(cp->dbe); 2805 2806 insque(&dsp->clientdeleglist, cp->clientdeleglist.prev); 2807 2808 rfs4_dbe_unlock(cp->dbe); 2809 2810 return (TRUE); 2811 } 2812 2813 static void 2814 rfs4_deleg_state_destroy(rfs4_entry_t u_entry) 2815 { 2816 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry; 2817 2818 if (&dsp->delegationlist != dsp->delegationlist.next) 2819 rfs4_return_deleg(dsp, FALSE); 2820 2821 /* Were done with the file */ 2822 rfs4_file_rele(dsp->finfo); 2823 dsp->finfo = NULL; 2824 2825 /* And now with the openowner */ 2826 rfs4_dbe_lock(dsp->client->dbe); 2827 2828 remque(&dsp->clientdeleglist); 2829 dsp->clientdeleglist.next = dsp->clientdeleglist.prev = 2830 &dsp->clientdeleglist; 2831 2832 rfs4_dbe_unlock(dsp->client->dbe); 2833 2834 rfs4_client_rele(dsp->client); 2835 dsp->client = NULL; 2836 } 2837 2838 rfs4_deleg_state_t * 2839 rfs4_finddeleg(rfs4_state_t *sp, bool_t *create) 2840 { 2841 rfs4_deleg_state_t ds, *dsp; 2842 2843 ds.client = sp->owner->client; 2844 ds.finfo = sp->finfo; 2845 2846 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(rfs4_deleg_idx, &ds, 2847 create, &ds, RFS4_DBS_VALID); 2848 2849 return (dsp); 2850 } 2851 2852 rfs4_deleg_state_t * 2853 rfs4_finddelegstate(stateid_t *id) 2854 { 2855 rfs4_deleg_state_t *dsp; 2856 bool_t create = FALSE; 2857 2858 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(rfs4_deleg_state_idx, id, 2859 &create, NULL, RFS4_DBS_VALID); 2860 2861 return (dsp); 2862 } 2863 2864 void 2865 rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp) 2866 { 2867 rfs4_dbe_rele(dsp->dbe); 2868 } 2869 2870 void 2871 rfs4_update_lock_sequence(rfs4_lo_state_t *lsp) 2872 { 2873 2874 rfs4_dbe_lock(lsp->dbe); 2875 2876 /* 2877 * If we are skipping sequence id checking, this means that 2878 * this is the first lock request and therefore the sequence 2879 * id does not need to be updated. This only happens on the 2880 * first lock request for a lockowner 2881 */ 2882 if (!lsp->skip_seqid_check) 2883 lsp->seqid++; 2884 2885 rfs4_dbe_unlock(lsp->dbe); 2886 } 2887 2888 void 2889 rfs4_update_lock_resp(rfs4_lo_state_t *lsp, nfs_resop4 *resp) 2890 { 2891 2892 rfs4_dbe_lock(lsp->dbe); 2893 2894 rfs4_free_reply(lsp->reply); 2895 2896 rfs4_copy_reply(lsp->reply, resp); 2897 2898 rfs4_dbe_unlock(lsp->dbe); 2899 } 2900 2901 void 2902 rfs4_free_opens(rfs4_openowner_t *op, bool_t invalidate, 2903 bool_t close_of_client) 2904 { 2905 rfs4_state_t *sp; 2906 2907 rfs4_dbe_lock(op->dbe); 2908 2909 for (sp = op->ownerstateids.next->sp; sp != NULL; 2910 sp = sp->ownerstateids.next->sp) { 2911 rfs4_state_close(sp, FALSE, close_of_client, CRED()); 2912 if (invalidate == TRUE) 2913 rfs4_dbe_invalidate(sp->dbe); 2914 } 2915 2916 rfs4_dbe_unlock(op->dbe); 2917 rfs4_dbe_invalidate(op->dbe); 2918 } 2919 2920 static uint32_t 2921 state_owner_file_hash(void *key) 2922 { 2923 rfs4_state_t *sp = key; 2924 2925 return (ADDRHASH(sp->owner) ^ ADDRHASH(sp->finfo)); 2926 } 2927 2928 static bool_t 2929 state_owner_file_compare(rfs4_entry_t u_entry, void *key) 2930 { 2931 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 2932 rfs4_state_t *arg = key; 2933 2934 if (sp->closed == TRUE) 2935 return (FALSE); 2936 2937 return (arg->owner == sp->owner && arg->finfo == sp->finfo); 2938 } 2939 2940 static void * 2941 state_owner_file_mkkey(rfs4_entry_t u_entry) 2942 { 2943 return (u_entry); 2944 } 2945 2946 static uint32_t 2947 state_file_hash(void *key) 2948 { 2949 return (ADDRHASH(key)); 2950 } 2951 2952 static bool_t 2953 state_file_compare(rfs4_entry_t u_entry, void *key) 2954 { 2955 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 2956 rfs4_file_t *fp = key; 2957 2958 if (sp->closed == TRUE) 2959 return (FALSE); 2960 2961 return (fp == sp->finfo); 2962 } 2963 2964 static void * 2965 state_file_mkkey(rfs4_entry_t u_entry) 2966 { 2967 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 2968 2969 return (sp->finfo); 2970 } 2971 2972 rfs4_state_t * 2973 rfs4_findstate_by_owner_file(rfs4_openowner_t *op, rfs4_file_t *file, 2974 bool_t *create) 2975 { 2976 rfs4_state_t *sp; 2977 rfs4_state_t key; 2978 2979 key.owner = op; 2980 key.finfo = file; 2981 2982 sp = (rfs4_state_t *)rfs4_dbsearch(rfs4_state_owner_file_idx, &key, 2983 create, &key, RFS4_DBS_VALID); 2984 2985 return (sp); 2986 } 2987 2988 /* This returns ANY state struct that refers to this file */ 2989 static rfs4_state_t * 2990 rfs4_findstate_by_file(rfs4_file_t *fp) 2991 { 2992 bool_t create = FALSE; 2993 2994 return ((rfs4_state_t *)rfs4_dbsearch(rfs4_state_file_idx, fp, 2995 &create, fp, RFS4_DBS_VALID)); 2996 } 2997 2998 static bool_t 2999 rfs4_state_expiry(rfs4_entry_t u_entry) 3000 { 3001 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 3002 3003 if (rfs4_dbe_is_invalid(sp->dbe)) 3004 return (TRUE); 3005 3006 if (sp->closed == TRUE && 3007 ((gethrestime_sec() - rfs4_dbe_get_timerele(sp->dbe)) 3008 > rfs4_lease_time)) 3009 return (TRUE); 3010 3011 return ((gethrestime_sec() - sp->owner->client->last_access 3012 > rfs4_lease_time)); 3013 } 3014 3015 static bool_t 3016 rfs4_state_create(rfs4_entry_t u_entry, void *argp) 3017 { 3018 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 3019 rfs4_file_t *fp = ((rfs4_state_t *)argp)->finfo; 3020 rfs4_openowner_t *op = ((rfs4_state_t *)argp)->owner; 3021 3022 rfs4_dbe_hold(fp->dbe); 3023 rfs4_dbe_hold(op->dbe); 3024 sp->stateid = get_stateid(rfs4_dbe_getid(sp->dbe)); 3025 sp->stateid.bits.type = OPENID; 3026 sp->owner = op; 3027 sp->finfo = fp; 3028 3029 /* Init lists for remque/insque */ 3030 sp->ownerstateids.next = sp->ownerstateids.prev = &sp->ownerstateids; 3031 sp->ownerstateids.sp = sp; 3032 sp->lockownerlist.next = sp->lockownerlist.prev = &sp->lockownerlist; 3033 sp->lockownerlist.lsp = NULL; 3034 3035 /* Insert state on per open owner's list */ 3036 rfs4_dbe_lock(op->dbe); 3037 3038 insque(&sp->ownerstateids, op->ownerstateids.prev); 3039 3040 rfs4_dbe_unlock(op->dbe); 3041 3042 return (TRUE); 3043 } 3044 3045 static rfs4_state_t * 3046 rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, 3047 bool_t lock_fp) 3048 { 3049 rfs4_state_t *sp; 3050 bool_t create = FALSE; 3051 3052 sp = (rfs4_state_t *)rfs4_dbsearch(rfs4_state_idx, id, 3053 &create, NULL, find_invalid); 3054 if (lock_fp == TRUE && sp != NULL) 3055 rw_enter(&sp->finfo->file_rwlock, RW_READER); 3056 3057 return (sp); 3058 } 3059 3060 void 3061 rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, 3062 bool_t close_of_client, cred_t *cr) 3063 { 3064 /* Remove the associated lo_state owners */ 3065 if (!lock_held) 3066 rfs4_dbe_lock(sp->dbe); 3067 3068 /* 3069 * If refcnt == 0, the dbe is about to be destroyed. 3070 * lock state will be released by the reaper thread. 3071 */ 3072 3073 if (rfs4_dbe_refcnt(sp->dbe) > 0) { 3074 if (sp->closed == FALSE) { 3075 sp->closed = TRUE; 3076 3077 rfs4_release_share_lock_state(sp, cr, close_of_client); 3078 } 3079 } 3080 3081 if (!lock_held) 3082 rfs4_dbe_unlock(sp->dbe); 3083 } 3084 3085 /* 3086 * Remove all state associated with the given client. 3087 */ 3088 void 3089 rfs4_client_state_remove(rfs4_client_t *cp) 3090 { 3091 rfs4_openowner_t *oop; 3092 3093 rfs4_dbe_lock(cp->dbe); 3094 3095 for (oop = cp->openownerlist.next->oop; oop != NULL; 3096 oop = oop->openownerlist.next->oop) { 3097 rfs4_free_opens(oop, TRUE, TRUE); 3098 } 3099 3100 rfs4_dbe_unlock(cp->dbe); 3101 } 3102 3103 void 3104 rfs4_client_close(rfs4_client_t *cp) 3105 { 3106 /* Mark client as going away. */ 3107 rfs4_dbe_lock(cp->dbe); 3108 rfs4_dbe_invalidate(cp->dbe); 3109 rfs4_dbe_unlock(cp->dbe); 3110 3111 rfs4_client_state_remove(cp); 3112 3113 /* Release the client */ 3114 rfs4_client_rele(cp); 3115 } 3116 3117 nfsstat4 3118 rfs4_check_clientid(clientid4 *cp, int setclid_confirm) 3119 { 3120 cid *cidp = (cid *) cp; 3121 3122 /* 3123 * If we are booted as a cluster node, check the embedded nodeid. 3124 * If it indicates that this clientid was generated on another node, 3125 * inform the client accordingly. 3126 */ 3127 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp)) 3128 return (NFS4ERR_STALE_CLIENTID); 3129 3130 /* 3131 * If the server start time matches the time provided 3132 * by the client (via the clientid) and this is NOT a 3133 * setclientid_confirm then return EXPIRED. 3134 */ 3135 if (!setclid_confirm && cidp->impl_id.start_time == rfs4_start_time) 3136 return (NFS4ERR_EXPIRED); 3137 3138 return (NFS4ERR_STALE_CLIENTID); 3139 } 3140 3141 /* 3142 * This is used when a stateid has not been found amongst the 3143 * current server's state. Check the stateid to see if it 3144 * was from this server instantiation or not. 3145 */ 3146 static nfsstat4 3147 what_stateid_error(stateid_t *id, stateid_type_t type) 3148 { 3149 /* If we are booted as a cluster node, was stateid locally generated? */ 3150 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id)) 3151 return (NFS4ERR_STALE_STATEID); 3152 3153 /* If types don't match then no use checking further */ 3154 if (type != id->bits.type) 3155 return (NFS4ERR_BAD_STATEID); 3156 3157 /* From a previous server instantiation, return STALE */ 3158 if (id->bits.boottime < rfs4_start_time) 3159 return (NFS4ERR_STALE_STATEID); 3160 3161 /* 3162 * From this server but the state is most likely beyond lease 3163 * timeout: return NFS4ERR_EXPIRED. However, there is the 3164 * case of a delegation stateid. For delegations, there is a 3165 * case where the state can be removed without the client's 3166 * knowledge/consent: revocation. In the case of delegation 3167 * revocation, the delegation state will be removed and will 3168 * not be found. If the client does something like a 3169 * DELEGRETURN or even a READ/WRITE with a delegatoin stateid 3170 * that has been revoked, the server should return BAD_STATEID 3171 * instead of the more common EXPIRED error. 3172 */ 3173 if (id->bits.boottime == rfs4_start_time) { 3174 if (type == DELEGID) 3175 return (NFS4ERR_BAD_STATEID); 3176 else 3177 return (NFS4ERR_EXPIRED); 3178 } 3179 3180 return (NFS4ERR_BAD_STATEID); 3181 } 3182 3183 /* 3184 * Used later on to find the various state structs. When called from 3185 * rfs4_check_stateid()->rfs4_get_all_state(), no file struct lock is 3186 * taken (it is not needed) and helps on the read/write path with 3187 * respect to performance. 3188 */ 3189 static nfsstat4 3190 rfs4_get_state_lockit(stateid4 *stateid, rfs4_state_t **spp, 3191 rfs4_dbsearch_type_t find_invalid, bool_t lock_fp) 3192 { 3193 stateid_t *id = (stateid_t *)stateid; 3194 rfs4_state_t *sp; 3195 3196 *spp = NULL; 3197 3198 /* If we are booted as a cluster node, was stateid locally generated? */ 3199 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id)) 3200 return (NFS4ERR_STALE_STATEID); 3201 3202 sp = rfs4_findstate(id, find_invalid, lock_fp); 3203 if (sp == NULL) { 3204 return (what_stateid_error(id, OPENID)); 3205 } 3206 3207 if (rfs4_lease_expired(sp->owner->client)) { 3208 if (lock_fp == TRUE) 3209 rfs4_state_rele(sp); 3210 else 3211 rfs4_state_rele_nounlock(sp); 3212 return (NFS4ERR_EXPIRED); 3213 } 3214 3215 *spp = sp; 3216 3217 return (NFS4_OK); 3218 } 3219 3220 nfsstat4 3221 rfs4_get_state(stateid4 *stateid, rfs4_state_t **spp, 3222 rfs4_dbsearch_type_t find_invalid) 3223 { 3224 return (rfs4_get_state_lockit(stateid, spp, find_invalid, TRUE)); 3225 } 3226 3227 int 3228 rfs4_check_stateid_seqid(rfs4_state_t *sp, stateid4 *stateid) 3229 { 3230 stateid_t *id = (stateid_t *)stateid; 3231 3232 if (rfs4_lease_expired(sp->owner->client)) 3233 return (NFS4_CHECK_STATEID_EXPIRED); 3234 3235 /* Stateid is some time in the future - that's bad */ 3236 if (sp->stateid.bits.chgseq < id->bits.chgseq) 3237 return (NFS4_CHECK_STATEID_BAD); 3238 3239 if (sp->stateid.bits.chgseq == id->bits.chgseq + 1) 3240 return (NFS4_CHECK_STATEID_REPLAY); 3241 3242 /* Stateid is some time in the past - that's old */ 3243 if (sp->stateid.bits.chgseq > id->bits.chgseq) 3244 return (NFS4_CHECK_STATEID_OLD); 3245 3246 /* Caller needs to know about confirmation before closure */ 3247 if (sp->owner->need_confirm) 3248 return (NFS4_CHECK_STATEID_UNCONFIRMED); 3249 3250 if (sp->closed == TRUE) 3251 return (NFS4_CHECK_STATEID_CLOSED); 3252 3253 return (NFS4_CHECK_STATEID_OKAY); 3254 } 3255 3256 int 3257 rfs4_check_lo_stateid_seqid(rfs4_lo_state_t *lsp, stateid4 *stateid) 3258 { 3259 stateid_t *id = (stateid_t *)stateid; 3260 3261 if (rfs4_lease_expired(lsp->state->owner->client)) 3262 return (NFS4_CHECK_STATEID_EXPIRED); 3263 3264 /* Stateid is some time in the future - that's bad */ 3265 if (lsp->lockid.bits.chgseq < id->bits.chgseq) 3266 return (NFS4_CHECK_STATEID_BAD); 3267 3268 if (lsp->lockid.bits.chgseq == id->bits.chgseq + 1) 3269 return (NFS4_CHECK_STATEID_REPLAY); 3270 3271 /* Stateid is some time in the past - that's old */ 3272 if (lsp->lockid.bits.chgseq > id->bits.chgseq) 3273 return (NFS4_CHECK_STATEID_OLD); 3274 3275 return (NFS4_CHECK_STATEID_OKAY); 3276 } 3277 3278 nfsstat4 3279 rfs4_get_deleg_state(stateid4 *stateid, rfs4_deleg_state_t **dspp) 3280 { 3281 stateid_t *id = (stateid_t *)stateid; 3282 rfs4_deleg_state_t *dsp; 3283 3284 *dspp = NULL; 3285 3286 /* If we are booted as a cluster node, was stateid locally generated? */ 3287 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id)) 3288 return (NFS4ERR_STALE_STATEID); 3289 3290 dsp = rfs4_finddelegstate(id); 3291 if (dsp == NULL) { 3292 return (what_stateid_error(id, DELEGID)); 3293 } 3294 3295 if (rfs4_lease_expired(dsp->client)) { 3296 rfs4_deleg_state_rele(dsp); 3297 return (NFS4ERR_EXPIRED); 3298 } 3299 3300 *dspp = dsp; 3301 3302 return (NFS4_OK); 3303 } 3304 3305 nfsstat4 3306 rfs4_get_lo_state(stateid4 *stateid, rfs4_lo_state_t **lspp, bool_t lock_fp) 3307 { 3308 stateid_t *id = (stateid_t *)stateid; 3309 rfs4_lo_state_t *lsp; 3310 3311 *lspp = NULL; 3312 3313 /* If we are booted as a cluster node, was stateid locally generated? */ 3314 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id)) 3315 return (NFS4ERR_STALE_STATEID); 3316 3317 lsp = rfs4_findlo_state(id, lock_fp); 3318 if (lsp == NULL) { 3319 return (what_stateid_error(id, LOCKID)); 3320 } 3321 3322 if (rfs4_lease_expired(lsp->state->owner->client)) { 3323 rfs4_lo_state_rele(lsp, lock_fp); 3324 return (NFS4ERR_EXPIRED); 3325 } 3326 3327 *lspp = lsp; 3328 3329 return (NFS4_OK); 3330 } 3331 3332 static nfsstat4 3333 rfs4_get_all_state(stateid4 *sid, rfs4_state_t **spp, 3334 rfs4_deleg_state_t **dspp, rfs4_lo_state_t **lospp) 3335 { 3336 rfs4_state_t *sp = NULL; 3337 rfs4_deleg_state_t *dsp = NULL; 3338 rfs4_lo_state_t *losp = NULL; 3339 stateid_t *id; 3340 nfsstat4 status; 3341 3342 *spp = NULL; *dspp = NULL; *lospp = NULL; 3343 3344 id = (stateid_t *)sid; 3345 switch (id->bits.type) { 3346 case OPENID: 3347 status = rfs4_get_state_lockit(sid, &sp, FALSE, FALSE); 3348 break; 3349 case DELEGID: 3350 status = rfs4_get_deleg_state(sid, &dsp); 3351 break; 3352 case LOCKID: 3353 status = rfs4_get_lo_state(sid, &losp, FALSE); 3354 if (status == NFS4_OK) { 3355 sp = losp->state; 3356 rfs4_dbe_hold(sp->dbe); 3357 } 3358 break; 3359 default: 3360 status = NFS4ERR_BAD_STATEID; 3361 } 3362 3363 if (status == NFS4_OK) { 3364 *spp = sp; 3365 *dspp = dsp; 3366 *lospp = losp; 3367 } 3368 3369 return (status); 3370 } 3371 3372 /* 3373 * Given the I/O mode (FREAD or FWRITE), this checks whether the 3374 * rfs4_state_t struct has access to do this operation and if so 3375 * return NFS4_OK; otherwise the proper NFSv4 error is returned. 3376 */ 3377 nfsstat4 3378 rfs4_state_has_access(rfs4_state_t *sp, int mode, vnode_t *vp) 3379 { 3380 nfsstat4 stat = NFS4_OK; 3381 rfs4_file_t *fp; 3382 bool_t create = FALSE; 3383 3384 rfs4_dbe_lock(sp->dbe); 3385 if (mode == FWRITE) { 3386 if (!(sp->share_access & OPEN4_SHARE_ACCESS_WRITE)) { 3387 stat = NFS4ERR_OPENMODE; 3388 } 3389 } else if (mode == FREAD) { 3390 if (!(sp->share_access & OPEN4_SHARE_ACCESS_READ)) { 3391 /* 3392 * If we have OPENed the file with DENYing access 3393 * to both READ and WRITE then no one else could 3394 * have OPENed the file, hence no conflicting READ 3395 * deny. This check is merely an optimization. 3396 */ 3397 if (sp->share_deny == OPEN4_SHARE_DENY_BOTH) 3398 goto out; 3399 3400 /* Check against file struct's DENY mode */ 3401 fp = rfs4_findfile(vp, NULL, &create); 3402 if (fp != NULL) { 3403 int deny_read = 0; 3404 rfs4_dbe_lock(fp->dbe); 3405 /* 3406 * Check if any other open owner has the file 3407 * OPENed with deny READ. 3408 */ 3409 if (sp->share_deny & OPEN4_SHARE_DENY_READ) 3410 deny_read = 1; 3411 ASSERT(fp->deny_read - deny_read >= 0); 3412 if (fp->deny_read - deny_read > 0) 3413 stat = NFS4ERR_OPENMODE; 3414 rfs4_dbe_unlock(fp->dbe); 3415 rfs4_file_rele(fp); 3416 } 3417 } 3418 } else { 3419 /* Illegal I/O mode */ 3420 stat = NFS4ERR_INVAL; 3421 } 3422 out: 3423 rfs4_dbe_unlock(sp->dbe); 3424 return (stat); 3425 } 3426 3427 /* 3428 * Given the I/O mode (FREAD or FWRITE), the vnode, the stateid and whether 3429 * the file is being truncated, return NFS4_OK if allowed or appropriate 3430 * V4 error if not. Note NFS4ERR_DELAY will be returned and a recall on 3431 * the associated file will be done if the I/O is not consistent with any 3432 * delegation in effect on the file. Should be holding VOP_RWLOCK, either 3433 * as reader or writer as appropriate. rfs4_op_open will acquire the 3434 * VOP_RWLOCK as writer when setting up delegation. If the stateid is bad 3435 * this routine will return NFS4ERR_BAD_STATEID. In addition, through the 3436 * deleg parameter, we will return whether a write delegation is held by 3437 * the client associated with this stateid. 3438 * If the server instance associated with the relevant client is in its 3439 * grace period, return NFS4ERR_GRACE. 3440 */ 3441 3442 nfsstat4 3443 rfs4_check_stateid(int mode, vnode_t *vp, 3444 stateid4 *stateid, bool_t trunc, bool_t *deleg, 3445 bool_t do_access, caller_context_t *ct) 3446 { 3447 rfs4_file_t *fp; 3448 bool_t create = FALSE; 3449 rfs4_state_t *sp; 3450 rfs4_deleg_state_t *dsp; 3451 rfs4_lo_state_t *lsp; 3452 stateid_t *id = (stateid_t *)stateid; 3453 nfsstat4 stat = NFS4_OK; 3454 3455 if (ct != NULL) { 3456 ct->cc_sysid = 0; 3457 ct->cc_pid = 0; 3458 ct->cc_caller_id = nfs4_srv_caller_id; 3459 } 3460 3461 if (ISSPECIAL(stateid)) { 3462 fp = rfs4_findfile(vp, NULL, &create); 3463 if (fp == NULL) 3464 return (NFS4_OK); 3465 if (fp->dinfo->dtype == OPEN_DELEGATE_NONE) { 3466 rfs4_file_rele(fp); 3467 return (NFS4_OK); 3468 } 3469 if (mode == FWRITE || 3470 fp->dinfo->dtype == OPEN_DELEGATE_WRITE) { 3471 rfs4_recall_deleg(fp, trunc, NULL); 3472 rfs4_file_rele(fp); 3473 return (NFS4ERR_DELAY); 3474 } 3475 rfs4_file_rele(fp); 3476 return (NFS4_OK); 3477 } else { 3478 stat = rfs4_get_all_state(stateid, &sp, &dsp, &lsp); 3479 if (stat != NFS4_OK) 3480 return (stat); 3481 if (lsp != NULL) { 3482 /* Is associated server instance in its grace period? */ 3483 if (rfs4_clnt_in_grace(lsp->locker->client)) { 3484 rfs4_lo_state_rele(lsp, FALSE); 3485 if (sp != NULL) 3486 rfs4_state_rele_nounlock(sp); 3487 return (NFS4ERR_GRACE); 3488 } 3489 if (id->bits.type == LOCKID) { 3490 /* Seqid in the future? - that's bad */ 3491 if (lsp->lockid.bits.chgseq < 3492 id->bits.chgseq) { 3493 rfs4_lo_state_rele(lsp, FALSE); 3494 if (sp != NULL) 3495 rfs4_state_rele_nounlock(sp); 3496 return (NFS4ERR_BAD_STATEID); 3497 } 3498 /* Seqid in the past? - that's old */ 3499 if (lsp->lockid.bits.chgseq > 3500 id->bits.chgseq) { 3501 rfs4_lo_state_rele(lsp, FALSE); 3502 if (sp != NULL) 3503 rfs4_state_rele_nounlock(sp); 3504 return (NFS4ERR_OLD_STATEID); 3505 } 3506 /* Ensure specified filehandle matches */ 3507 if (lsp->state->finfo->vp != vp) { 3508 rfs4_lo_state_rele(lsp, FALSE); 3509 if (sp != NULL) 3510 rfs4_state_rele_nounlock(sp); 3511 return (NFS4ERR_BAD_STATEID); 3512 } 3513 } 3514 if (ct != NULL) { 3515 ct->cc_sysid = lsp->locker->client->sysidt; 3516 ct->cc_pid = lsp->locker->pid; 3517 } 3518 rfs4_lo_state_rele(lsp, FALSE); 3519 } 3520 3521 /* Stateid provided was an "open" stateid */ 3522 if (sp != NULL) { 3523 /* Is associated server instance in its grace period? */ 3524 if (rfs4_clnt_in_grace(sp->owner->client)) { 3525 rfs4_state_rele_nounlock(sp); 3526 return (NFS4ERR_GRACE); 3527 } 3528 if (id->bits.type == OPENID) { 3529 /* Seqid in the future? - that's bad */ 3530 if (sp->stateid.bits.chgseq < 3531 id->bits.chgseq) { 3532 rfs4_state_rele_nounlock(sp); 3533 return (NFS4ERR_BAD_STATEID); 3534 } 3535 /* Seqid in the past - that's old */ 3536 if (sp->stateid.bits.chgseq > 3537 id->bits.chgseq) { 3538 rfs4_state_rele_nounlock(sp); 3539 return (NFS4ERR_OLD_STATEID); 3540 } 3541 } 3542 /* Ensure specified filehandle matches */ 3543 if (sp->finfo->vp != vp) { 3544 rfs4_state_rele_nounlock(sp); 3545 return (NFS4ERR_BAD_STATEID); 3546 } 3547 3548 if (sp->owner->need_confirm) { 3549 rfs4_state_rele_nounlock(sp); 3550 return (NFS4ERR_BAD_STATEID); 3551 } 3552 3553 if (sp->closed == TRUE) { 3554 rfs4_state_rele_nounlock(sp); 3555 return (NFS4ERR_OLD_STATEID); 3556 } 3557 3558 if (do_access) 3559 stat = rfs4_state_has_access(sp, mode, vp); 3560 else 3561 stat = NFS4_OK; 3562 3563 /* 3564 * Return whether this state has write 3565 * delegation if desired 3566 */ 3567 if (deleg && 3568 (sp->finfo->dinfo->dtype == OPEN_DELEGATE_WRITE)) 3569 *deleg = TRUE; 3570 3571 /* 3572 * We got a valid stateid, so we update the 3573 * lease on the client. Ideally we would like 3574 * to do this after the calling op succeeds, 3575 * but for now this will be good 3576 * enough. Callers of this routine are 3577 * currently insulated from the state stuff. 3578 */ 3579 rfs4_update_lease(sp->owner->client); 3580 3581 /* 3582 * If a delegation is present on this file and 3583 * this is a WRITE, then update the lastwrite 3584 * time to indicate that activity is present. 3585 */ 3586 if (sp->finfo->dinfo->dtype == OPEN_DELEGATE_WRITE && 3587 mode == FWRITE) { 3588 sp->finfo->dinfo->time_lastwrite = 3589 gethrestime_sec(); 3590 } 3591 3592 rfs4_state_rele_nounlock(sp); 3593 3594 return (stat); 3595 } 3596 3597 if (dsp != NULL) { 3598 /* Is associated server instance in its grace period? */ 3599 if (rfs4_clnt_in_grace(dsp->client)) { 3600 rfs4_deleg_state_rele(dsp); 3601 return (NFS4ERR_GRACE); 3602 } 3603 if (dsp->delegid.bits.chgseq != id->bits.chgseq) { 3604 rfs4_deleg_state_rele(dsp); 3605 return (NFS4ERR_BAD_STATEID); 3606 } 3607 3608 /* Ensure specified filehandle matches */ 3609 if (dsp->finfo->vp != vp) { 3610 rfs4_deleg_state_rele(dsp); 3611 return (NFS4ERR_BAD_STATEID); 3612 } 3613 /* 3614 * Return whether this state has write 3615 * delegation if desired 3616 */ 3617 if (deleg && 3618 (dsp->finfo->dinfo->dtype == OPEN_DELEGATE_WRITE)) 3619 *deleg = TRUE; 3620 3621 rfs4_update_lease(dsp->client); 3622 3623 /* 3624 * If a delegation is present on this file and 3625 * this is a WRITE, then update the lastwrite 3626 * time to indicate that activity is present. 3627 */ 3628 if (dsp->finfo->dinfo->dtype == OPEN_DELEGATE_WRITE && 3629 mode == FWRITE) { 3630 dsp->finfo->dinfo->time_lastwrite = 3631 gethrestime_sec(); 3632 } 3633 3634 /* 3635 * XXX - what happens if this is a WRITE and the 3636 * delegation type of for READ. 3637 */ 3638 rfs4_deleg_state_rele(dsp); 3639 3640 return (stat); 3641 } 3642 /* 3643 * If we got this far, something bad happened 3644 */ 3645 return (NFS4ERR_BAD_STATEID); 3646 } 3647 } 3648 3649 3650 /* 3651 * This is a special function in that for the file struct provided the 3652 * server wants to remove/close all current state associated with the 3653 * file. The prime use of this would be with OP_REMOVE to force the 3654 * release of state and particularly of file locks. 3655 * 3656 * There is an assumption that there is no delegations outstanding on 3657 * this file at this point. The caller should have waited for those 3658 * to be returned or revoked. 3659 */ 3660 void 3661 rfs4_close_all_state(rfs4_file_t *fp) 3662 { 3663 rfs4_state_t *sp; 3664 3665 rfs4_dbe_lock(fp->dbe); 3666 3667 #ifdef DEBUG 3668 /* only applies when server is handing out delegations */ 3669 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE) 3670 ASSERT(fp->dinfo->hold_grant > 0); 3671 #endif 3672 3673 /* No delegations for this file */ 3674 ASSERT(fp->delegationlist.next == &fp->delegationlist); 3675 3676 /* Make sure that it can not be found */ 3677 rfs4_dbe_invalidate(fp->dbe); 3678 3679 if (fp->vp == NULL) { 3680 rfs4_dbe_unlock(fp->dbe); 3681 return; 3682 } 3683 rfs4_dbe_unlock(fp->dbe); 3684 3685 /* 3686 * Hold as writer to prevent other server threads from 3687 * processing requests related to the file while all state is 3688 * being removed. 3689 */ 3690 rw_enter(&fp->file_rwlock, RW_WRITER); 3691 3692 /* Remove ALL state from the file */ 3693 while (sp = rfs4_findstate_by_file(fp)) { 3694 rfs4_state_close(sp, FALSE, FALSE, CRED()); 3695 rfs4_state_rele_nounlock(sp); 3696 } 3697 3698 /* 3699 * This is only safe since there are no further references to 3700 * the file. 3701 */ 3702 rfs4_dbe_lock(fp->dbe); 3703 if (fp->vp) { 3704 vnode_t *vp = fp->vp; 3705 3706 mutex_enter(&vp->v_lock); 3707 (void) vsd_set(vp, nfs4_srv_vkey, NULL); 3708 mutex_exit(&vp->v_lock); 3709 VN_RELE(vp); 3710 fp->vp = NULL; 3711 } 3712 rfs4_dbe_unlock(fp->dbe); 3713 3714 /* Finally let other references to proceed */ 3715 rw_exit(&fp->file_rwlock); 3716 } 3717 3718 /* 3719 * This function is used as a target for the rfs4_dbe_walk() call 3720 * below. The purpose of this function is to see if the 3721 * lockowner_state refers to a file that resides within the exportinfo 3722 * export. If so, then remove the lock_owner state (file locks and 3723 * share "locks") for this object since the intent is the server is 3724 * unexporting the specified directory. Be sure to invalidate the 3725 * object after the state has been released 3726 */ 3727 static void 3728 rfs4_lo_state_walk_callout(rfs4_entry_t u_entry, void *e) 3729 { 3730 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry; 3731 struct exportinfo *exi = (struct exportinfo *)e; 3732 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp; 3733 fhandle_t *efhp; 3734 3735 efhp = (fhandle_t *)&exi->exi_fh; 3736 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4; 3737 3738 FH_TO_FMT4(efhp, exi_fhp); 3739 3740 finfo_fhp = 3741 (nfs_fh4_fmt_t *)lsp->state->finfo->filehandle.nfs_fh4_val; 3742 3743 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) && 3744 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata, 3745 exi_fhp->fh4_xlen) == 0) { 3746 rfs4_state_close(lsp->state, FALSE, FALSE, CRED()); 3747 rfs4_dbe_invalidate(lsp->dbe); 3748 rfs4_dbe_invalidate(lsp->state->dbe); 3749 } 3750 } 3751 3752 /* 3753 * This function is used as a target for the rfs4_dbe_walk() call 3754 * below. The purpose of this function is to see if the state refers 3755 * to a file that resides within the exportinfo export. If so, then 3756 * remove the open state for this object since the intent is the 3757 * server is unexporting the specified directory. The main result for 3758 * this type of entry is to invalidate it such it will not be found in 3759 * the future. 3760 */ 3761 static void 3762 rfs4_state_walk_callout(rfs4_entry_t u_entry, void *e) 3763 { 3764 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 3765 struct exportinfo *exi = (struct exportinfo *)e; 3766 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp; 3767 fhandle_t *efhp; 3768 3769 efhp = (fhandle_t *)&exi->exi_fh; 3770 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4; 3771 3772 FH_TO_FMT4(efhp, exi_fhp); 3773 3774 finfo_fhp = 3775 (nfs_fh4_fmt_t *)sp->finfo->filehandle.nfs_fh4_val; 3776 3777 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) && 3778 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata, 3779 exi_fhp->fh4_xlen) == 0) { 3780 rfs4_state_close(sp, TRUE, FALSE, CRED()); 3781 rfs4_dbe_invalidate(sp->dbe); 3782 } 3783 } 3784 3785 /* 3786 * This function is used as a target for the rfs4_dbe_walk() call 3787 * below. The purpose of this function is to see if the state refers 3788 * to a file that resides within the exportinfo export. If so, then 3789 * remove the deleg state for this object since the intent is the 3790 * server is unexporting the specified directory. The main result for 3791 * this type of entry is to invalidate it such it will not be found in 3792 * the future. 3793 */ 3794 static void 3795 rfs4_deleg_state_walk_callout(rfs4_entry_t u_entry, void *e) 3796 { 3797 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry; 3798 struct exportinfo *exi = (struct exportinfo *)e; 3799 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp; 3800 fhandle_t *efhp; 3801 3802 efhp = (fhandle_t *)&exi->exi_fh; 3803 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4; 3804 3805 FH_TO_FMT4(efhp, exi_fhp); 3806 3807 finfo_fhp = 3808 (nfs_fh4_fmt_t *)dsp->finfo->filehandle.nfs_fh4_val; 3809 3810 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) && 3811 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata, 3812 exi_fhp->fh4_xlen) == 0) { 3813 rfs4_dbe_invalidate(dsp->dbe); 3814 } 3815 } 3816 3817 /* 3818 * This function is used as a target for the rfs4_dbe_walk() call 3819 * below. The purpose of this function is to see if the state refers 3820 * to a file that resides within the exportinfo export. If so, then 3821 * release vnode hold for this object since the intent is the server 3822 * is unexporting the specified directory. Invalidation will prevent 3823 * this struct from being found in the future. 3824 */ 3825 static void 3826 rfs4_file_walk_callout(rfs4_entry_t u_entry, void *e) 3827 { 3828 rfs4_file_t *fp = (rfs4_file_t *)u_entry; 3829 struct exportinfo *exi = (struct exportinfo *)e; 3830 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp; 3831 fhandle_t *efhp; 3832 3833 efhp = (fhandle_t *)&exi->exi_fh; 3834 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4; 3835 3836 FH_TO_FMT4(efhp, exi_fhp); 3837 3838 finfo_fhp = (nfs_fh4_fmt_t *)fp->filehandle.nfs_fh4_val; 3839 3840 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) && 3841 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata, 3842 exi_fhp->fh4_xlen) == 0) { 3843 if (fp->vp) { 3844 vnode_t *vp = fp->vp; 3845 3846 /* 3847 * don't leak monitors and remove the reference 3848 * put on the vnode when the delegation was granted. 3849 */ 3850 if (fp->dinfo->dtype == OPEN_DELEGATE_READ) { 3851 (void) fem_uninstall(vp, deleg_rdops, 3852 (void *)fp); 3853 vn_open_downgrade(vp, FREAD); 3854 } else if (fp->dinfo->dtype == OPEN_DELEGATE_WRITE) { 3855 (void) fem_uninstall(vp, deleg_wrops, 3856 (void *)fp); 3857 vn_open_downgrade(vp, FREAD|FWRITE); 3858 } 3859 mutex_enter(&vp->v_lock); 3860 (void) vsd_set(vp, nfs4_srv_vkey, NULL); 3861 mutex_exit(&vp->v_lock); 3862 VN_RELE(vp); 3863 fp->vp = NULL; 3864 } 3865 rfs4_dbe_invalidate(fp->dbe); 3866 } 3867 } 3868 3869 /* 3870 * Given a directory that is being unexported, cleanup/release all 3871 * state in the server that refers to objects residing underneath this 3872 * particular export. The ordering of the release is important. 3873 * Lock_owner, then state and then file. 3874 */ 3875 void 3876 rfs4_clean_state_exi(struct exportinfo *exi) 3877 { 3878 mutex_enter(&rfs4_state_lock); 3879 3880 if (rfs4_server_state == NULL) { 3881 mutex_exit(&rfs4_state_lock); 3882 return; 3883 } 3884 3885 rfs4_dbe_walk(rfs4_lo_state_tab, rfs4_lo_state_walk_callout, exi); 3886 rfs4_dbe_walk(rfs4_state_tab, rfs4_state_walk_callout, exi); 3887 rfs4_dbe_walk(rfs4_deleg_state_tab, rfs4_deleg_state_walk_callout, exi); 3888 rfs4_dbe_walk(rfs4_file_tab, rfs4_file_walk_callout, exi); 3889 3890 mutex_exit(&rfs4_state_lock); 3891 } 3892