1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 /* 27 * Copyright 2018 Nexenta Systems, Inc. 28 * Copyright 2019 Nexenta by DDN, Inc. 29 */ 30 31 #include <sys/systm.h> 32 #include <sys/kmem.h> 33 #include <sys/cmn_err.h> 34 #include <sys/atomic.h> 35 #include <sys/clconf.h> 36 #include <sys/cladm.h> 37 #include <sys/flock.h> 38 #include <nfs/export.h> 39 #include <nfs/nfs.h> 40 #include <nfs/nfs4.h> 41 #include <nfs/nfssys.h> 42 #include <nfs/lm.h> 43 #include <sys/pathname.h> 44 #include <sys/sdt.h> 45 #include <sys/nvpair.h> 46 47 extern u_longlong_t nfs4_srv_caller_id; 48 49 extern uint_t nfs4_srv_vkey; 50 51 stateid4 special0 = { 52 0, 53 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } 54 }; 55 56 stateid4 special1 = { 57 0xffffffff, 58 { 59 (char)0xff, (char)0xff, (char)0xff, (char)0xff, 60 (char)0xff, (char)0xff, (char)0xff, (char)0xff, 61 (char)0xff, (char)0xff, (char)0xff, (char)0xff 62 } 63 }; 64 65 66 #define ISSPECIAL(id) (stateid4_cmp(id, &special0) || \ 67 stateid4_cmp(id, &special1)) 68 69 /* For embedding the cluster nodeid into our clientid */ 70 #define CLUSTER_NODEID_SHIFT 24 71 #define CLUSTER_MAX_NODEID 255 72 73 #ifdef DEBUG 74 int rfs4_debug; 75 #endif 76 77 rfs4_db_mem_cache_t rfs4_db_mem_cache_table[RFS4_DB_MEM_CACHE_NUM]; 78 static uint32_t rfs4_database_debug = 0x00; 79 80 /* CSTYLED */ 81 static void rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf); 82 static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf); 83 static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip); 84 static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip); 85 86 /* 87 * Couple of simple init/destroy functions for a general waiter 88 */ 89 void 90 rfs4_sw_init(rfs4_state_wait_t *swp) 91 { 92 mutex_init(swp->sw_cv_lock, NULL, MUTEX_DEFAULT, NULL); 93 cv_init(swp->sw_cv, NULL, CV_DEFAULT, NULL); 94 swp->sw_active = FALSE; 95 swp->sw_wait_count = 0; 96 } 97 98 void 99 rfs4_sw_destroy(rfs4_state_wait_t *swp) 100 { 101 mutex_destroy(swp->sw_cv_lock); 102 cv_destroy(swp->sw_cv); 103 } 104 105 void 106 rfs4_sw_enter(rfs4_state_wait_t *swp) 107 { 108 mutex_enter(swp->sw_cv_lock); 109 while (swp->sw_active) { 110 swp->sw_wait_count++; 111 cv_wait(swp->sw_cv, swp->sw_cv_lock); 112 swp->sw_wait_count--; 113 } 114 ASSERT(swp->sw_active == FALSE); 115 swp->sw_active = TRUE; 116 mutex_exit(swp->sw_cv_lock); 117 } 118 119 void 120 rfs4_sw_exit(rfs4_state_wait_t *swp) 121 { 122 mutex_enter(swp->sw_cv_lock); 123 ASSERT(swp->sw_active == TRUE); 124 swp->sw_active = FALSE; 125 if (swp->sw_wait_count != 0) 126 cv_broadcast(swp->sw_cv); 127 mutex_exit(swp->sw_cv_lock); 128 } 129 130 static void 131 deep_lock_copy(LOCK4res *dres, LOCK4res *sres) 132 { 133 lock_owner4 *slo = &sres->LOCK4res_u.denied.owner; 134 lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner; 135 136 if (sres->status == NFS4ERR_DENIED) { 137 dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP); 138 bcopy(slo->owner_val, dlo->owner_val, slo->owner_len); 139 } 140 } 141 142 /* 143 * CPR callback id -- not related to v4 callbacks 144 */ 145 static callb_id_t cpr_id = 0; 146 147 static void 148 deep_lock_free(LOCK4res *res) 149 { 150 lock_owner4 *lo = &res->LOCK4res_u.denied.owner; 151 152 if (res->status == NFS4ERR_DENIED) 153 kmem_free(lo->owner_val, lo->owner_len); 154 } 155 156 static void 157 deep_open_copy(OPEN4res *dres, OPEN4res *sres) 158 { 159 nfsace4 *sacep, *dacep; 160 161 if (sres->status != NFS4_OK) { 162 return; 163 } 164 165 dres->attrset = sres->attrset; 166 167 switch (sres->delegation.delegation_type) { 168 case OPEN_DELEGATE_NONE: 169 return; 170 case OPEN_DELEGATE_READ: 171 sacep = &sres->delegation.open_delegation4_u.read.permissions; 172 dacep = &dres->delegation.open_delegation4_u.read.permissions; 173 break; 174 case OPEN_DELEGATE_WRITE: 175 sacep = &sres->delegation.open_delegation4_u.write.permissions; 176 dacep = &dres->delegation.open_delegation4_u.write.permissions; 177 break; 178 } 179 dacep->who.utf8string_val = 180 kmem_alloc(sacep->who.utf8string_len, KM_SLEEP); 181 bcopy(sacep->who.utf8string_val, dacep->who.utf8string_val, 182 sacep->who.utf8string_len); 183 } 184 185 static void 186 deep_open_free(OPEN4res *res) 187 { 188 nfsace4 *acep; 189 if (res->status != NFS4_OK) 190 return; 191 192 switch (res->delegation.delegation_type) { 193 case OPEN_DELEGATE_NONE: 194 return; 195 case OPEN_DELEGATE_READ: 196 acep = &res->delegation.open_delegation4_u.read.permissions; 197 break; 198 case OPEN_DELEGATE_WRITE: 199 acep = &res->delegation.open_delegation4_u.write.permissions; 200 break; 201 } 202 203 if (acep->who.utf8string_val) { 204 kmem_free(acep->who.utf8string_val, acep->who.utf8string_len); 205 acep->who.utf8string_val = NULL; 206 } 207 } 208 209 void 210 rfs4_free_reply(nfs_resop4 *rp) 211 { 212 switch (rp->resop) { 213 case OP_LOCK: 214 deep_lock_free(&rp->nfs_resop4_u.oplock); 215 break; 216 case OP_OPEN: 217 deep_open_free(&rp->nfs_resop4_u.opopen); 218 default: 219 break; 220 } 221 } 222 223 void 224 rfs4_copy_reply(nfs_resop4 *dst, nfs_resop4 *src) 225 { 226 *dst = *src; 227 228 /* Handle responses that need deep copy */ 229 switch (src->resop) { 230 case OP_LOCK: 231 deep_lock_copy(&dst->nfs_resop4_u.oplock, 232 &src->nfs_resop4_u.oplock); 233 break; 234 case OP_OPEN: 235 deep_open_copy(&dst->nfs_resop4_u.opopen, 236 &src->nfs_resop4_u.opopen); 237 break; 238 default: 239 break; 240 }; 241 } 242 243 /* 244 * This is the implementation of the underlying state engine. The 245 * public interface to this engine is described by 246 * nfs4_state.h. Callers to the engine should hold no state engine 247 * locks when they call in to it. If the protocol needs to lock data 248 * structures it should do so after acquiring all references to them 249 * first and then follow the following lock order: 250 * 251 * client > openowner > state > lo_state > lockowner > file. 252 * 253 * Internally we only allow a thread to hold one hash bucket lock at a 254 * time and the lock is higher in the lock order (must be acquired 255 * first) than the data structure that is on that hash list. 256 * 257 * If a new reference was acquired by the caller, that reference needs 258 * to be released after releasing all acquired locks with the 259 * corresponding rfs4_*_rele routine. 260 */ 261 262 /* 263 * This code is some what prototypical for now. Its purpose currently is to 264 * implement the interfaces sufficiently to finish the higher protocol 265 * elements. This will be replaced by a dynamically resizeable tables 266 * backed by kmem_cache allocator. However synchronization is handled 267 * correctly (I hope) and will not change by much. The mutexes for 268 * the hash buckets that can be used to create new instances of data 269 * structures might be good candidates to evolve into reader writer 270 * locks. If it has to do a creation, it would be holding the 271 * mutex across a kmem_alloc with KM_SLEEP specified. 272 */ 273 274 #ifdef DEBUG 275 #define TABSIZE 17 276 #else 277 #define TABSIZE 2047 278 #endif 279 280 #define ADDRHASH(key) ((unsigned long)(key) >> 3) 281 282 #define MAXTABSZ 1024*1024 283 284 /* The values below are rfs4_lease_time units */ 285 286 #ifdef DEBUG 287 #define CLIENT_CACHE_TIME 1 288 #define OPENOWNER_CACHE_TIME 1 289 #define STATE_CACHE_TIME 1 290 #define LO_STATE_CACHE_TIME 1 291 #define LOCKOWNER_CACHE_TIME 1 292 #define FILE_CACHE_TIME 3 293 #define DELEG_STATE_CACHE_TIME 1 294 #else 295 #define CLIENT_CACHE_TIME 10 296 #define OPENOWNER_CACHE_TIME 5 297 #define STATE_CACHE_TIME 1 298 #define LO_STATE_CACHE_TIME 1 299 #define LOCKOWNER_CACHE_TIME 3 300 #define FILE_CACHE_TIME 40 301 #define DELEG_STATE_CACHE_TIME 1 302 #endif 303 304 /* 305 * NFSv4 server state databases 306 * 307 * Initilized when the module is loaded and used by NFSv4 state tables. 308 * These kmem_cache databases are global, the tables that make use of these 309 * are per zone. 310 */ 311 kmem_cache_t *rfs4_client_mem_cache; 312 kmem_cache_t *rfs4_clntIP_mem_cache; 313 kmem_cache_t *rfs4_openown_mem_cache; 314 kmem_cache_t *rfs4_openstID_mem_cache; 315 kmem_cache_t *rfs4_lockstID_mem_cache; 316 kmem_cache_t *rfs4_lockown_mem_cache; 317 kmem_cache_t *rfs4_file_mem_cache; 318 kmem_cache_t *rfs4_delegstID_mem_cache; 319 320 /* 321 * NFSv4 state table functions 322 */ 323 static bool_t rfs4_client_create(rfs4_entry_t, void *); 324 static void rfs4_dss_remove_cpleaf(rfs4_client_t *); 325 static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *); 326 static void rfs4_client_destroy(rfs4_entry_t); 327 static bool_t rfs4_client_expiry(rfs4_entry_t); 328 static uint32_t clientid_hash(void *); 329 static bool_t clientid_compare(rfs4_entry_t, void *); 330 static void *clientid_mkkey(rfs4_entry_t); 331 static uint32_t nfsclnt_hash(void *); 332 static bool_t nfsclnt_compare(rfs4_entry_t, void *); 333 static void *nfsclnt_mkkey(rfs4_entry_t); 334 static bool_t rfs4_clntip_expiry(rfs4_entry_t); 335 static void rfs4_clntip_destroy(rfs4_entry_t); 336 static bool_t rfs4_clntip_create(rfs4_entry_t, void *); 337 static uint32_t clntip_hash(void *); 338 static bool_t clntip_compare(rfs4_entry_t, void *); 339 static void *clntip_mkkey(rfs4_entry_t); 340 static bool_t rfs4_openowner_create(rfs4_entry_t, void *); 341 static void rfs4_openowner_destroy(rfs4_entry_t); 342 static bool_t rfs4_openowner_expiry(rfs4_entry_t); 343 static uint32_t openowner_hash(void *); 344 static bool_t openowner_compare(rfs4_entry_t, void *); 345 static void *openowner_mkkey(rfs4_entry_t); 346 static bool_t rfs4_state_create(rfs4_entry_t, void *); 347 static void rfs4_state_destroy(rfs4_entry_t); 348 static bool_t rfs4_state_expiry(rfs4_entry_t); 349 static uint32_t state_hash(void *); 350 static bool_t state_compare(rfs4_entry_t, void *); 351 static void *state_mkkey(rfs4_entry_t); 352 static uint32_t state_owner_file_hash(void *); 353 static bool_t state_owner_file_compare(rfs4_entry_t, void *); 354 static void *state_owner_file_mkkey(rfs4_entry_t); 355 static uint32_t state_file_hash(void *); 356 static bool_t state_file_compare(rfs4_entry_t, void *); 357 static void *state_file_mkkey(rfs4_entry_t); 358 static bool_t rfs4_lo_state_create(rfs4_entry_t, void *); 359 static void rfs4_lo_state_destroy(rfs4_entry_t); 360 static bool_t rfs4_lo_state_expiry(rfs4_entry_t); 361 static uint32_t lo_state_hash(void *); 362 static bool_t lo_state_compare(rfs4_entry_t, void *); 363 static void *lo_state_mkkey(rfs4_entry_t); 364 static uint32_t lo_state_lo_hash(void *); 365 static bool_t lo_state_lo_compare(rfs4_entry_t, void *); 366 static void *lo_state_lo_mkkey(rfs4_entry_t); 367 static bool_t rfs4_lockowner_create(rfs4_entry_t, void *); 368 static void rfs4_lockowner_destroy(rfs4_entry_t); 369 static bool_t rfs4_lockowner_expiry(rfs4_entry_t); 370 static uint32_t lockowner_hash(void *); 371 static bool_t lockowner_compare(rfs4_entry_t, void *); 372 static void *lockowner_mkkey(rfs4_entry_t); 373 static uint32_t pid_hash(void *); 374 static bool_t pid_compare(rfs4_entry_t, void *); 375 static void *pid_mkkey(rfs4_entry_t); 376 static bool_t rfs4_file_create(rfs4_entry_t, void *); 377 static void rfs4_file_destroy(rfs4_entry_t); 378 static uint32_t file_hash(void *); 379 static bool_t file_compare(rfs4_entry_t, void *); 380 static void *file_mkkey(rfs4_entry_t); 381 static bool_t rfs4_deleg_state_create(rfs4_entry_t, void *); 382 static void rfs4_deleg_state_destroy(rfs4_entry_t); 383 static bool_t rfs4_deleg_state_expiry(rfs4_entry_t); 384 static uint32_t deleg_hash(void *); 385 static bool_t deleg_compare(rfs4_entry_t, void *); 386 static void *deleg_mkkey(rfs4_entry_t); 387 static uint32_t deleg_state_hash(void *); 388 static bool_t deleg_state_compare(rfs4_entry_t, void *); 389 static void *deleg_state_mkkey(rfs4_entry_t); 390 391 static void rfs4_state_rele_nounlock(rfs4_state_t *); 392 393 static int rfs4_ss_enabled = 0; 394 395 extern void (*rfs4_client_clrst)(struct nfs4clrst_args *); 396 397 void 398 rfs4_ss_pnfree(rfs4_ss_pn_t *ss_pn) 399 { 400 kmem_free(ss_pn, sizeof (rfs4_ss_pn_t)); 401 } 402 403 static rfs4_ss_pn_t * 404 rfs4_ss_pnalloc(char *dir, char *leaf) 405 { 406 rfs4_ss_pn_t *ss_pn; 407 int dir_len, leaf_len; 408 409 /* 410 * validate we have a resonable path 411 * (account for the '/' and trailing null) 412 */ 413 if ((dir_len = strlen(dir)) > MAXPATHLEN || 414 (leaf_len = strlen(leaf)) > MAXNAMELEN || 415 (dir_len + leaf_len + 2) > MAXPATHLEN) { 416 return (NULL); 417 } 418 419 ss_pn = kmem_alloc(sizeof (rfs4_ss_pn_t), KM_SLEEP); 420 421 (void) snprintf(ss_pn->pn, MAXPATHLEN, "%s/%s", dir, leaf); 422 /* Handy pointer to just the leaf name */ 423 ss_pn->leaf = ss_pn->pn + dir_len + 1; 424 return (ss_pn); 425 } 426 427 428 /* 429 * Move the "leaf" filename from "sdir" directory 430 * to the "ddir" directory. Return the pathname of 431 * the destination unless the rename fails in which 432 * case we need to return the source pathname. 433 */ 434 static rfs4_ss_pn_t * 435 rfs4_ss_movestate(char *sdir, char *ddir, char *leaf) 436 { 437 rfs4_ss_pn_t *src, *dst; 438 439 if ((src = rfs4_ss_pnalloc(sdir, leaf)) == NULL) 440 return (NULL); 441 442 if ((dst = rfs4_ss_pnalloc(ddir, leaf)) == NULL) { 443 rfs4_ss_pnfree(src); 444 return (NULL); 445 } 446 447 /* 448 * If the rename fails we shall return the src 449 * pathname and free the dst. Otherwise we need 450 * to free the src and return the dst pathanme. 451 */ 452 if (vn_rename(src->pn, dst->pn, UIO_SYSSPACE)) { 453 rfs4_ss_pnfree(dst); 454 return (src); 455 } 456 rfs4_ss_pnfree(src); 457 return (dst); 458 } 459 460 461 static rfs4_oldstate_t * 462 rfs4_ss_getstate(vnode_t *dvp, rfs4_ss_pn_t *ss_pn) 463 { 464 struct uio uio; 465 struct iovec iov[3]; 466 467 rfs4_oldstate_t *cl_ss = NULL; 468 vnode_t *vp; 469 vattr_t va; 470 uint_t id_len; 471 int err, kill_file, file_vers; 472 473 if (ss_pn == NULL) 474 return (NULL); 475 476 /* 477 * open the state file. 478 */ 479 if (vn_open(ss_pn->pn, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0) != 0) { 480 return (NULL); 481 } 482 483 if (vp->v_type != VREG) { 484 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); 485 VN_RELE(vp); 486 return (NULL); 487 } 488 489 err = VOP_ACCESS(vp, VREAD, 0, CRED(), NULL); 490 if (err) { 491 /* 492 * We don't have read access? better get the heck out. 493 */ 494 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); 495 VN_RELE(vp); 496 return (NULL); 497 } 498 499 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 500 /* 501 * get the file size to do some basic validation 502 */ 503 va.va_mask = AT_SIZE; 504 err = VOP_GETATTR(vp, &va, 0, CRED(), NULL); 505 506 kill_file = (va.va_size == 0 || va.va_size < 507 (NFS4_VERIFIER_SIZE + sizeof (uint_t)+1)); 508 509 if (err || kill_file) { 510 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 511 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); 512 VN_RELE(vp); 513 if (kill_file) { 514 (void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0); 515 } 516 return (NULL); 517 } 518 519 cl_ss = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP); 520 521 /* 522 * build iovecs to read in the file_version, verifier and id_len 523 */ 524 iov[0].iov_base = (caddr_t)&file_vers; 525 iov[0].iov_len = sizeof (int); 526 iov[1].iov_base = (caddr_t)&cl_ss->cl_id4.verifier; 527 iov[1].iov_len = NFS4_VERIFIER_SIZE; 528 iov[2].iov_base = (caddr_t)&id_len; 529 iov[2].iov_len = sizeof (uint_t); 530 531 uio.uio_iov = iov; 532 uio.uio_iovcnt = 3; 533 uio.uio_segflg = UIO_SYSSPACE; 534 uio.uio_loffset = 0; 535 uio.uio_resid = sizeof (int) + NFS4_VERIFIER_SIZE + sizeof (uint_t); 536 537 if (err = VOP_READ(vp, &uio, FREAD, CRED(), NULL)) { 538 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 539 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); 540 VN_RELE(vp); 541 kmem_free(cl_ss, sizeof (rfs4_oldstate_t)); 542 return (NULL); 543 } 544 545 /* 546 * if the file_version doesn't match or if the 547 * id_len is zero or the combination of the verifier, 548 * id_len and id_val is bigger than the file we have 549 * a problem. If so ditch the file. 550 */ 551 kill_file = (file_vers != NFS4_SS_VERSION || id_len == 0 || 552 (id_len + NFS4_VERIFIER_SIZE + sizeof (uint_t)) > va.va_size); 553 554 if (err || kill_file) { 555 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 556 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); 557 VN_RELE(vp); 558 kmem_free(cl_ss, sizeof (rfs4_oldstate_t)); 559 if (kill_file) { 560 (void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0); 561 } 562 return (NULL); 563 } 564 565 /* 566 * now get the client id value 567 */ 568 cl_ss->cl_id4.id_val = kmem_alloc(id_len, KM_SLEEP); 569 iov[0].iov_base = cl_ss->cl_id4.id_val; 570 iov[0].iov_len = id_len; 571 572 uio.uio_iov = iov; 573 uio.uio_iovcnt = 1; 574 uio.uio_segflg = UIO_SYSSPACE; 575 uio.uio_resid = cl_ss->cl_id4.id_len = id_len; 576 577 if (err = VOP_READ(vp, &uio, FREAD, CRED(), NULL)) { 578 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 579 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); 580 VN_RELE(vp); 581 kmem_free(cl_ss->cl_id4.id_val, id_len); 582 kmem_free(cl_ss, sizeof (rfs4_oldstate_t)); 583 return (NULL); 584 } 585 586 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 587 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL); 588 VN_RELE(vp); 589 return (cl_ss); 590 } 591 592 #ifdef nextdp 593 #undef nextdp 594 #endif 595 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen)) 596 597 /* 598 * Add entries from statedir to supplied oldstate list. 599 * Optionally, move all entries from statedir -> destdir. 600 */ 601 void 602 rfs4_ss_oldstate(rfs4_oldstate_t *oldstate, char *statedir, char *destdir) 603 { 604 rfs4_ss_pn_t *ss_pn; 605 rfs4_oldstate_t *cl_ss = NULL; 606 char *dirt = NULL; 607 int err, dir_eof = 0, size = 0; 608 vnode_t *dvp; 609 struct iovec iov; 610 struct uio uio; 611 struct dirent64 *dep; 612 offset_t dirchunk_offset = 0; 613 614 /* 615 * open the state directory 616 */ 617 if (vn_open(statedir, UIO_SYSSPACE, FREAD, 0, &dvp, 0, 0)) 618 return; 619 620 if (dvp->v_type != VDIR || VOP_ACCESS(dvp, VREAD, 0, CRED(), NULL)) 621 goto out; 622 623 dirt = kmem_alloc(RFS4_SS_DIRSIZE, KM_SLEEP); 624 625 /* 626 * Get and process the directory entries 627 */ 628 while (!dir_eof) { 629 (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL); 630 iov.iov_base = dirt; 631 iov.iov_len = RFS4_SS_DIRSIZE; 632 uio.uio_iov = &iov; 633 uio.uio_iovcnt = 1; 634 uio.uio_segflg = UIO_SYSSPACE; 635 uio.uio_loffset = dirchunk_offset; 636 uio.uio_resid = RFS4_SS_DIRSIZE; 637 638 err = VOP_READDIR(dvp, &uio, CRED(), &dir_eof, NULL, 0); 639 VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL); 640 if (err) 641 goto out; 642 643 size = RFS4_SS_DIRSIZE - uio.uio_resid; 644 645 /* 646 * Process all the directory entries in this 647 * readdir chunk 648 */ 649 for (dep = (struct dirent64 *)dirt; size > 0; 650 dep = nextdp(dep)) { 651 652 size -= dep->d_reclen; 653 dirchunk_offset = dep->d_off; 654 655 /* 656 * Skip '.' and '..' 657 */ 658 if (NFS_IS_DOTNAME(dep->d_name)) 659 continue; 660 661 ss_pn = rfs4_ss_pnalloc(statedir, dep->d_name); 662 if (ss_pn == NULL) 663 continue; 664 665 if (cl_ss = rfs4_ss_getstate(dvp, ss_pn)) { 666 if (destdir != NULL) { 667 rfs4_ss_pnfree(ss_pn); 668 cl_ss->ss_pn = rfs4_ss_movestate( 669 statedir, destdir, dep->d_name); 670 } else { 671 cl_ss->ss_pn = ss_pn; 672 } 673 insque(cl_ss, oldstate); 674 } else { 675 rfs4_ss_pnfree(ss_pn); 676 } 677 } 678 } 679 680 out: 681 (void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL); 682 VN_RELE(dvp); 683 if (dirt) 684 kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE); 685 } 686 687 static void 688 rfs4_ss_init(nfs4_srv_t *nsrv4) 689 { 690 int npaths = 1; 691 char *default_dss_path = NFS4_DSS_VAR_DIR; 692 693 /* read the default stable storage state */ 694 rfs4_dss_readstate(nsrv4, npaths, &default_dss_path); 695 696 rfs4_ss_enabled = 1; 697 } 698 699 static void 700 rfs4_ss_fini(nfs4_srv_t *nsrv4) 701 { 702 rfs4_servinst_t *sip; 703 704 mutex_enter(&nsrv4->servinst_lock); 705 sip = nsrv4->nfs4_cur_servinst; 706 while (sip != NULL) { 707 rfs4_dss_clear_oldstate(sip); 708 sip = sip->next; 709 } 710 mutex_exit(&nsrv4->servinst_lock); 711 } 712 713 /* 714 * Remove all oldstate files referenced by this servinst. 715 */ 716 static void 717 rfs4_dss_clear_oldstate(rfs4_servinst_t *sip) 718 { 719 rfs4_oldstate_t *os_head, *osp; 720 721 rw_enter(&sip->oldstate_lock, RW_WRITER); 722 os_head = sip->oldstate; 723 724 if (os_head == NULL) { 725 rw_exit(&sip->oldstate_lock); 726 return; 727 } 728 729 /* skip dummy entry */ 730 osp = os_head->next; 731 while (osp != os_head) { 732 char *leaf = osp->ss_pn->leaf; 733 rfs4_oldstate_t *os_next; 734 735 rfs4_dss_remove_leaf(sip, NFS4_DSS_OLDSTATE_LEAF, leaf); 736 737 if (osp->cl_id4.id_val) 738 kmem_free(osp->cl_id4.id_val, osp->cl_id4.id_len); 739 rfs4_ss_pnfree(osp->ss_pn); 740 741 os_next = osp->next; 742 remque(osp); 743 kmem_free(osp, sizeof (rfs4_oldstate_t)); 744 osp = os_next; 745 } 746 747 rw_exit(&sip->oldstate_lock); 748 } 749 750 /* 751 * Form the state and oldstate paths, and read in the stable storage files. 752 */ 753 void 754 rfs4_dss_readstate(nfs4_srv_t *nsrv4, int npaths, char **paths) 755 { 756 int i; 757 char *state, *oldstate; 758 759 state = kmem_alloc(MAXPATHLEN, KM_SLEEP); 760 oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP); 761 762 for (i = 0; i < npaths; i++) { 763 char *path = paths[i]; 764 765 (void) sprintf(state, "%s/%s", path, NFS4_DSS_STATE_LEAF); 766 (void) sprintf(oldstate, "%s/%s", path, NFS4_DSS_OLDSTATE_LEAF); 767 768 /* 769 * Populate the current server instance's oldstate list. 770 * 771 * 1. Read stable storage data from old state directory, 772 * leaving its contents alone. 773 * 774 * 2. Read stable storage data from state directory, 775 * and move the latter's contents to old state 776 * directory. 777 */ 778 /* CSTYLED */ 779 rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, oldstate, NULL); 780 /* CSTYLED */ 781 rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, state, oldstate); 782 } 783 784 kmem_free(state, MAXPATHLEN); 785 kmem_free(oldstate, MAXPATHLEN); 786 } 787 788 789 /* 790 * Check if we are still in grace and if the client can be 791 * granted permission to perform reclaims. 792 */ 793 void 794 rfs4_ss_chkclid(nfs4_srv_t *nsrv4, rfs4_client_t *cp) 795 { 796 rfs4_servinst_t *sip; 797 798 /* 799 * It should be sufficient to check the oldstate data for just 800 * this client's instance. However, since our per-instance 801 * client grouping is solely temporal, HA-NFSv4 RG failover 802 * might result in clients of the same RG being partitioned into 803 * separate instances. 804 * 805 * Until the client grouping is improved, we must check the 806 * oldstate data for all instances with an active grace period. 807 * 808 * This also serves as the mechanism to remove stale oldstate data. 809 * The first time we check an instance after its grace period has 810 * expired, the oldstate data should be cleared. 811 * 812 * Start at the current instance, and walk the list backwards 813 * to the first. 814 */ 815 mutex_enter(&nsrv4->servinst_lock); 816 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) { 817 rfs4_ss_chkclid_sip(cp, sip); 818 819 /* if the above check found this client, we're done */ 820 if (cp->rc_can_reclaim) 821 break; 822 } 823 mutex_exit(&nsrv4->servinst_lock); 824 } 825 826 static void 827 rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip) 828 { 829 rfs4_oldstate_t *osp, *os_head; 830 831 /* short circuit everything if this server instance has no oldstate */ 832 rw_enter(&sip->oldstate_lock, RW_READER); 833 os_head = sip->oldstate; 834 rw_exit(&sip->oldstate_lock); 835 if (os_head == NULL) 836 return; 837 838 /* 839 * If this server instance is no longer in a grace period then 840 * the client won't be able to reclaim. No further need for this 841 * instance's oldstate data, so it can be cleared. 842 */ 843 if (!rfs4_servinst_in_grace(sip)) 844 return; 845 846 /* this instance is still in grace; search for the clientid */ 847 848 rw_enter(&sip->oldstate_lock, RW_READER); 849 850 os_head = sip->oldstate; 851 /* skip dummy entry */ 852 osp = os_head->next; 853 while (osp != os_head) { 854 if (osp->cl_id4.id_len == cp->rc_nfs_client.id_len) { 855 if (bcmp(osp->cl_id4.id_val, cp->rc_nfs_client.id_val, 856 osp->cl_id4.id_len) == 0) { 857 cp->rc_can_reclaim = 1; 858 break; 859 } 860 } 861 osp = osp->next; 862 } 863 864 rw_exit(&sip->oldstate_lock); 865 } 866 867 /* 868 * Place client information into stable storage: 1/3. 869 * First, generate the leaf filename, from the client's IP address and 870 * the server-generated short-hand clientid. 871 */ 872 void 873 rfs4_ss_clid(nfs4_srv_t *nsrv4, rfs4_client_t *cp) 874 { 875 const char *kinet_ntop6(uchar_t *, char *, size_t); 876 char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN]; 877 struct sockaddr *ca; 878 uchar_t *b; 879 880 if (rfs4_ss_enabled == 0) { 881 return; 882 } 883 884 buf[0] = 0; 885 886 ca = (struct sockaddr *)&cp->rc_addr; 887 888 /* 889 * Convert the caller's IP address to a dotted string 890 */ 891 if (ca->sa_family == AF_INET) { 892 b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr; 893 (void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF, 894 b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF); 895 } else if (ca->sa_family == AF_INET6) { 896 struct sockaddr_in6 *sin6; 897 898 sin6 = (struct sockaddr_in6 *)ca; 899 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr, 900 buf, INET6_ADDRSTRLEN); 901 } 902 903 (void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf, 904 (longlong_t)cp->rc_clientid); 905 rfs4_ss_clid_write(nsrv4, cp, leaf); 906 } 907 908 /* 909 * Place client information into stable storage: 2/3. 910 * DSS: distributed stable storage: the file may need to be written to 911 * multiple directories. 912 */ 913 static void 914 rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf) 915 { 916 rfs4_servinst_t *sip; 917 918 /* 919 * It should be sufficient to write the leaf file to (all) DSS paths 920 * associated with just this client's instance. However, since our 921 * per-instance client grouping is solely temporal, HA-NFSv4 RG 922 * failover might result in us losing DSS data. 923 * 924 * Until the client grouping is improved, we must write the DSS data 925 * to all instances' paths. Start at the current instance, and 926 * walk the list backwards to the first. 927 */ 928 mutex_enter(&nsrv4->servinst_lock); 929 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) { 930 int i, npaths = sip->dss_npaths; 931 932 /* write the leaf file to all DSS paths */ 933 for (i = 0; i < npaths; i++) { 934 rfs4_dss_path_t *dss_path = sip->dss_paths[i]; 935 936 /* HA-NFSv4 path might have been failed-away from us */ 937 if (dss_path == NULL) 938 continue; 939 940 rfs4_ss_clid_write_one(cp, dss_path->path, leaf); 941 } 942 } 943 mutex_exit(&nsrv4->servinst_lock); 944 } 945 946 /* 947 * Place client information into stable storage: 3/3. 948 * Write the stable storage data to the requested file. 949 */ 950 static void 951 rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf) 952 { 953 int ioflag; 954 int file_vers = NFS4_SS_VERSION; 955 size_t dirlen; 956 struct uio uio; 957 struct iovec iov[4]; 958 char *dir; 959 rfs4_ss_pn_t *ss_pn; 960 vnode_t *vp; 961 nfs_client_id4 *cl_id4 = &(cp->rc_nfs_client); 962 963 /* allow 2 extra bytes for '/' & NUL */ 964 dirlen = strlen(dss_path) + strlen(NFS4_DSS_STATE_LEAF) + 2; 965 dir = kmem_alloc(dirlen, KM_SLEEP); 966 (void) sprintf(dir, "%s/%s", dss_path, NFS4_DSS_STATE_LEAF); 967 968 ss_pn = rfs4_ss_pnalloc(dir, leaf); 969 /* rfs4_ss_pnalloc takes its own copy */ 970 kmem_free(dir, dirlen); 971 if (ss_pn == NULL) 972 return; 973 974 if (vn_open(ss_pn->pn, UIO_SYSSPACE, FCREAT|FWRITE, 0600, &vp, 975 CRCREAT, 0)) { 976 rfs4_ss_pnfree(ss_pn); 977 return; 978 } 979 980 /* 981 * We need to record leaf - i.e. the filename - so that we know 982 * what to remove, in the future. However, the dir part of cp->ss_pn 983 * should never be referenced directly, since it's potentially only 984 * one of several paths with this leaf in it. 985 */ 986 if (cp->rc_ss_pn != NULL) { 987 if (strcmp(cp->rc_ss_pn->leaf, leaf) == 0) { 988 /* we've already recorded *this* leaf */ 989 rfs4_ss_pnfree(ss_pn); 990 } else { 991 /* replace with this leaf */ 992 rfs4_ss_pnfree(cp->rc_ss_pn); 993 cp->rc_ss_pn = ss_pn; 994 } 995 } else { 996 cp->rc_ss_pn = ss_pn; 997 } 998 999 /* 1000 * Build a scatter list that points to the nfs_client_id4 1001 */ 1002 iov[0].iov_base = (caddr_t)&file_vers; 1003 iov[0].iov_len = sizeof (int); 1004 iov[1].iov_base = (caddr_t)&(cl_id4->verifier); 1005 iov[1].iov_len = NFS4_VERIFIER_SIZE; 1006 iov[2].iov_base = (caddr_t)&(cl_id4->id_len); 1007 iov[2].iov_len = sizeof (uint_t); 1008 iov[3].iov_base = (caddr_t)cl_id4->id_val; 1009 iov[3].iov_len = cl_id4->id_len; 1010 1011 uio.uio_iov = iov; 1012 uio.uio_iovcnt = 4; 1013 uio.uio_loffset = 0; 1014 uio.uio_segflg = UIO_SYSSPACE; 1015 uio.uio_llimit = (rlim64_t)MAXOFFSET_T; 1016 uio.uio_resid = cl_id4->id_len + sizeof (int) + 1017 NFS4_VERIFIER_SIZE + sizeof (uint_t); 1018 1019 ioflag = uio.uio_fmode = (FWRITE|FSYNC); 1020 uio.uio_extflg = UIO_COPY_DEFAULT; 1021 1022 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL); 1023 /* write the full client id to the file. */ 1024 (void) VOP_WRITE(vp, &uio, ioflag, CRED(), NULL); 1025 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL); 1026 1027 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL); 1028 VN_RELE(vp); 1029 } 1030 1031 /* 1032 * DSS: distributed stable storage. 1033 * Unpack the list of paths passed by nfsd. 1034 * Use nvlist_alloc(9F) to manage the data. 1035 * The caller is responsible for allocating and freeing the buffer. 1036 */ 1037 int 1038 rfs4_dss_setpaths(char *buf, size_t buflen) 1039 { 1040 int error; 1041 1042 /* 1043 * If this is a "warm start", i.e. we previously had DSS paths, 1044 * preserve the old paths. 1045 */ 1046 if (rfs4_dss_paths != NULL) { 1047 /* 1048 * Before we lose the ptr, destroy the nvlist and pathnames 1049 * array from the warm start before this one. 1050 */ 1051 nvlist_free(rfs4_dss_oldpaths); 1052 rfs4_dss_oldpaths = rfs4_dss_paths; 1053 } 1054 1055 /* unpack the buffer into a searchable nvlist */ 1056 error = nvlist_unpack(buf, buflen, &rfs4_dss_paths, KM_SLEEP); 1057 if (error) 1058 return (error); 1059 1060 /* 1061 * Search the nvlist for the pathnames nvpair (which is the only nvpair 1062 * in the list, and record its location. 1063 */ 1064 error = nvlist_lookup_string_array(rfs4_dss_paths, NFS4_DSS_NVPAIR_NAME, 1065 &rfs4_dss_newpaths, &rfs4_dss_numnewpaths); 1066 return (error); 1067 } 1068 1069 /* 1070 * Ultimately the nfssys() call NFS4_CLR_STATE endsup here 1071 * to find and mark the client for forced expire. 1072 */ 1073 static void 1074 rfs4_client_scrub(rfs4_entry_t ent, void *arg) 1075 { 1076 rfs4_client_t *cp = (rfs4_client_t *)ent; 1077 struct nfs4clrst_args *clr = arg; 1078 struct sockaddr_in6 *ent_sin6; 1079 struct in6_addr clr_in6; 1080 struct sockaddr_in *ent_sin; 1081 struct in_addr clr_in; 1082 1083 if (clr->addr_type != cp->rc_addr.ss_family) { 1084 return; 1085 } 1086 1087 switch (clr->addr_type) { 1088 1089 case AF_INET6: 1090 /* copyin the address from user space */ 1091 if (copyin(clr->ap, &clr_in6, sizeof (clr_in6))) { 1092 break; 1093 } 1094 1095 ent_sin6 = (struct sockaddr_in6 *)&cp->rc_addr; 1096 1097 /* 1098 * now compare, and if equivalent mark entry 1099 * for forced expiration 1100 */ 1101 if (IN6_ARE_ADDR_EQUAL(&ent_sin6->sin6_addr, &clr_in6)) { 1102 cp->rc_forced_expire = 1; 1103 } 1104 break; 1105 1106 case AF_INET: 1107 /* copyin the address from user space */ 1108 if (copyin(clr->ap, &clr_in, sizeof (clr_in))) { 1109 break; 1110 } 1111 1112 ent_sin = (struct sockaddr_in *)&cp->rc_addr; 1113 1114 /* 1115 * now compare, and if equivalent mark entry 1116 * for forced expiration 1117 */ 1118 if (ent_sin->sin_addr.s_addr == clr_in.s_addr) { 1119 cp->rc_forced_expire = 1; 1120 } 1121 break; 1122 1123 default: 1124 /* force this assert to fail */ 1125 ASSERT(clr->addr_type != clr->addr_type); 1126 } 1127 } 1128 1129 /* 1130 * This is called from nfssys() in order to clear server state 1131 * for the specified client IP Address. 1132 */ 1133 void 1134 rfs4_clear_client_state(struct nfs4clrst_args *clr) 1135 { 1136 nfs4_srv_t *nsrv4; 1137 nsrv4 = nfs4_get_srv(); 1138 (void) rfs4_dbe_walk(nsrv4->rfs4_client_tab, rfs4_client_scrub, clr); 1139 } 1140 1141 /* 1142 * Used to initialize the NFSv4 server's state or database. All of 1143 * the tables are created and timers are set. 1144 */ 1145 void 1146 rfs4_state_g_init() 1147 { 1148 extern boolean_t rfs4_cpr_callb(void *, int); 1149 /* 1150 * Add a CPR callback so that we can update client 1151 * access times to extend the lease after a suspend 1152 * and resume (using the same class as rpcmod/connmgr) 1153 */ 1154 cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4"); 1155 1156 /* 1157 * NFSv4 server state databases 1158 * 1159 * Initialized when the module is loaded and used by NFSv4 state 1160 * tables. These kmem_cache free pools are used globally, the NFSv4 1161 * state tables which make use of these kmem_cache free pools are per 1162 * zone. 1163 * 1164 * initialize the global kmem_cache free pools which will be used by 1165 * the NFSv4 state tables. 1166 */ 1167 /* CSTYLED */ 1168 rfs4_client_mem_cache = nfs4_init_mem_cache("Client_entry_cache", 2, sizeof (rfs4_client_t), 0); 1169 /* CSTYLED */ 1170 rfs4_clntIP_mem_cache = nfs4_init_mem_cache("ClntIP_entry_cache", 1, sizeof (rfs4_clntip_t), 1); 1171 /* CSTYLED */ 1172 rfs4_openown_mem_cache = nfs4_init_mem_cache("OpenOwner_entry_cache", 1, sizeof (rfs4_openowner_t), 2); 1173 /* CSTYLED */ 1174 rfs4_openstID_mem_cache = nfs4_init_mem_cache("OpenStateID_entry_cache", 3, sizeof (rfs4_state_t), 3); 1175 /* CSTYLED */ 1176 rfs4_lockstID_mem_cache = nfs4_init_mem_cache("LockStateID_entry_cache", 3, sizeof (rfs4_lo_state_t), 4); 1177 /* CSTYLED */ 1178 rfs4_lockown_mem_cache = nfs4_init_mem_cache("Lockowner_entry_cache", 2, sizeof (rfs4_lockowner_t), 5); 1179 /* CSTYLED */ 1180 rfs4_file_mem_cache = nfs4_init_mem_cache("File_entry_cache", 1, sizeof (rfs4_file_t), 6); 1181 /* CSTYLED */ 1182 rfs4_delegstID_mem_cache = nfs4_init_mem_cache("DelegStateID_entry_cache", 2, sizeof (rfs4_deleg_state_t), 7); 1183 1184 rfs4_client_clrst = rfs4_clear_client_state; 1185 } 1186 1187 1188 /* 1189 * Used at server shutdown to cleanup all of the NFSv4 server's structures 1190 * and other state. 1191 */ 1192 void 1193 rfs4_state_g_fini() 1194 { 1195 int i; 1196 /* 1197 * Cleanup the CPR callback. 1198 */ 1199 if (cpr_id) 1200 (void) callb_delete(cpr_id); 1201 1202 rfs4_client_clrst = NULL; 1203 1204 /* free the NFSv4 state databases */ 1205 for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) { 1206 kmem_cache_destroy(rfs4_db_mem_cache_table[i].r_db_mem_cache); 1207 rfs4_db_mem_cache_table[i].r_db_mem_cache = NULL; 1208 } 1209 1210 rfs4_client_mem_cache = NULL; 1211 rfs4_clntIP_mem_cache = NULL; 1212 rfs4_openown_mem_cache = NULL; 1213 rfs4_openstID_mem_cache = NULL; 1214 rfs4_lockstID_mem_cache = NULL; 1215 rfs4_lockown_mem_cache = NULL; 1216 rfs4_file_mem_cache = NULL; 1217 rfs4_delegstID_mem_cache = NULL; 1218 1219 /* DSS: distributed stable storage */ 1220 nvlist_free(rfs4_dss_oldpaths); 1221 nvlist_free(rfs4_dss_paths); 1222 rfs4_dss_paths = rfs4_dss_oldpaths = NULL; 1223 } 1224 1225 /* 1226 * Used to initialize the per zone NFSv4 server's state 1227 */ 1228 void 1229 rfs4_state_zone_init(nfs4_srv_t *nsrv4) 1230 { 1231 time_t start_time; 1232 int start_grace; 1233 char *dss_path = NFS4_DSS_VAR_DIR; 1234 1235 /* DSS: distributed stable storage: initialise served paths list */ 1236 nsrv4->dss_pathlist = NULL; 1237 1238 /* 1239 * Set the boot time. If the server 1240 * has been restarted quickly and has had the opportunity to 1241 * service clients, then the start_time needs to be bumped 1242 * regardless. A small window but it exists... 1243 */ 1244 start_time = gethrestime_sec(); 1245 if (nsrv4->rfs4_start_time < start_time) 1246 nsrv4->rfs4_start_time = start_time; 1247 else 1248 nsrv4->rfs4_start_time++; 1249 1250 /* 1251 * Create the first server instance, or a new one if the server has 1252 * been restarted; see above comments on rfs4_start_time. Don't 1253 * start its grace period; that will be done later, to maximise the 1254 * clients' recovery window. 1255 */ 1256 start_grace = 0; 1257 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) { 1258 int i; 1259 char **dss_allpaths = NULL; 1260 dss_allpaths = kmem_alloc(sizeof (char *) * 1261 (rfs4_dss_numnewpaths + 1), KM_SLEEP); 1262 /* 1263 * Add the default path into the list of paths for saving 1264 * state informantion. 1265 */ 1266 dss_allpaths[0] = dss_path; 1267 for (i = 0; i < rfs4_dss_numnewpaths; i++) { 1268 dss_allpaths[i + 1] = rfs4_dss_newpaths[i]; 1269 } 1270 rfs4_servinst_create(nsrv4, start_grace, 1271 (rfs4_dss_numnewpaths + 1), dss_allpaths); 1272 kmem_free(dss_allpaths, 1273 (sizeof (char *) * (rfs4_dss_numnewpaths + 1))); 1274 } else { 1275 rfs4_servinst_create(nsrv4, start_grace, 1, &dss_path); 1276 } 1277 1278 /* reset the "first NFSv4 request" status */ 1279 nsrv4->seen_first_compound = 0; 1280 1281 mutex_enter(&nsrv4->state_lock); 1282 1283 /* 1284 * If the server state database has already been initialized, 1285 * skip it 1286 */ 1287 if (nsrv4->nfs4_server_state != NULL) { 1288 mutex_exit(&nsrv4->state_lock); 1289 return; 1290 } 1291 1292 rw_init(&nsrv4->rfs4_findclient_lock, NULL, RW_DEFAULT, NULL); 1293 1294 /* set the various cache timers for table creation */ 1295 if (nsrv4->rfs4_client_cache_time == 0) 1296 nsrv4->rfs4_client_cache_time = CLIENT_CACHE_TIME; 1297 if (nsrv4->rfs4_openowner_cache_time == 0) 1298 nsrv4->rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME; 1299 if (nsrv4->rfs4_state_cache_time == 0) 1300 nsrv4->rfs4_state_cache_time = STATE_CACHE_TIME; 1301 if (nsrv4->rfs4_lo_state_cache_time == 0) 1302 nsrv4->rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME; 1303 if (nsrv4->rfs4_lockowner_cache_time == 0) 1304 nsrv4->rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME; 1305 if (nsrv4->rfs4_file_cache_time == 0) 1306 nsrv4->rfs4_file_cache_time = FILE_CACHE_TIME; 1307 if (nsrv4->rfs4_deleg_state_cache_time == 0) 1308 nsrv4->rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME; 1309 1310 /* Create the overall database to hold all server state */ 1311 nsrv4->nfs4_server_state = rfs4_database_create(rfs4_database_debug); 1312 1313 /* Now create the individual tables */ 1314 nsrv4->rfs4_client_cache_time *= rfs4_lease_time; 1315 nsrv4->rfs4_client_tab = rfs4_table_create(nsrv4->nfs4_server_state, 1316 "Client", 1317 nsrv4->rfs4_client_cache_time, 1318 2, 1319 rfs4_client_create, 1320 rfs4_client_destroy, 1321 rfs4_client_expiry, 1322 sizeof (rfs4_client_t), 1323 TABSIZE, 1324 MAXTABSZ/8, 100); 1325 nsrv4->rfs4_nfsclnt_idx = rfs4_index_create(nsrv4->rfs4_client_tab, 1326 "nfs_client_id4", nfsclnt_hash, 1327 nfsclnt_compare, nfsclnt_mkkey, 1328 TRUE); 1329 nsrv4->rfs4_clientid_idx = rfs4_index_create(nsrv4->rfs4_client_tab, 1330 "client_id", clientid_hash, 1331 clientid_compare, clientid_mkkey, 1332 FALSE); 1333 1334 nsrv4->rfs4_clntip_cache_time = 86400 * 365; /* about a year */ 1335 nsrv4->rfs4_clntip_tab = rfs4_table_create(nsrv4->nfs4_server_state, 1336 "ClntIP", 1337 nsrv4->rfs4_clntip_cache_time, 1338 1, 1339 rfs4_clntip_create, 1340 rfs4_clntip_destroy, 1341 rfs4_clntip_expiry, 1342 sizeof (rfs4_clntip_t), 1343 TABSIZE, 1344 MAXTABSZ, 100); 1345 nsrv4->rfs4_clntip_idx = rfs4_index_create(nsrv4->rfs4_clntip_tab, 1346 "client_ip", clntip_hash, 1347 clntip_compare, clntip_mkkey, 1348 TRUE); 1349 1350 nsrv4->rfs4_openowner_cache_time *= rfs4_lease_time; 1351 nsrv4->rfs4_openowner_tab = rfs4_table_create(nsrv4->nfs4_server_state, 1352 "OpenOwner", 1353 nsrv4->rfs4_openowner_cache_time, 1354 1, 1355 rfs4_openowner_create, 1356 rfs4_openowner_destroy, 1357 rfs4_openowner_expiry, 1358 sizeof (rfs4_openowner_t), 1359 TABSIZE, 1360 MAXTABSZ, 100); 1361 nsrv4->rfs4_openowner_idx = rfs4_index_create(nsrv4->rfs4_openowner_tab, 1362 "open_owner4", openowner_hash, 1363 openowner_compare, 1364 openowner_mkkey, TRUE); 1365 1366 nsrv4->rfs4_state_cache_time *= rfs4_lease_time; 1367 nsrv4->rfs4_state_tab = rfs4_table_create(nsrv4->nfs4_server_state, 1368 "OpenStateID", 1369 nsrv4->rfs4_state_cache_time, 1370 3, 1371 rfs4_state_create, 1372 rfs4_state_destroy, 1373 rfs4_state_expiry, 1374 sizeof (rfs4_state_t), 1375 TABSIZE, 1376 MAXTABSZ, 100); 1377 1378 /* CSTYLED */ 1379 nsrv4->rfs4_state_owner_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab, 1380 "Openowner-File", 1381 state_owner_file_hash, 1382 state_owner_file_compare, 1383 state_owner_file_mkkey, TRUE); 1384 1385 nsrv4->rfs4_state_idx = rfs4_index_create(nsrv4->rfs4_state_tab, 1386 "State-id", state_hash, 1387 state_compare, state_mkkey, FALSE); 1388 1389 nsrv4->rfs4_state_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab, 1390 "File", state_file_hash, 1391 state_file_compare, state_file_mkkey, 1392 FALSE); 1393 1394 nsrv4->rfs4_lo_state_cache_time *= rfs4_lease_time; 1395 nsrv4->rfs4_lo_state_tab = rfs4_table_create(nsrv4->nfs4_server_state, 1396 "LockStateID", 1397 nsrv4->rfs4_lo_state_cache_time, 1398 2, 1399 rfs4_lo_state_create, 1400 rfs4_lo_state_destroy, 1401 rfs4_lo_state_expiry, 1402 sizeof (rfs4_lo_state_t), 1403 TABSIZE, 1404 MAXTABSZ, 100); 1405 1406 /* CSTYLED */ 1407 nsrv4->rfs4_lo_state_owner_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab, 1408 "lockownerxstate", 1409 lo_state_lo_hash, 1410 lo_state_lo_compare, 1411 lo_state_lo_mkkey, TRUE); 1412 1413 nsrv4->rfs4_lo_state_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab, 1414 "State-id", 1415 lo_state_hash, lo_state_compare, 1416 lo_state_mkkey, FALSE); 1417 1418 nsrv4->rfs4_lockowner_cache_time *= rfs4_lease_time; 1419 1420 nsrv4->rfs4_lockowner_tab = rfs4_table_create(nsrv4->nfs4_server_state, 1421 "Lockowner", 1422 nsrv4->rfs4_lockowner_cache_time, 1423 2, 1424 rfs4_lockowner_create, 1425 rfs4_lockowner_destroy, 1426 rfs4_lockowner_expiry, 1427 sizeof (rfs4_lockowner_t), 1428 TABSIZE, 1429 MAXTABSZ, 100); 1430 1431 nsrv4->rfs4_lockowner_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab, 1432 "lock_owner4", lockowner_hash, 1433 lockowner_compare, 1434 lockowner_mkkey, TRUE); 1435 1436 /* CSTYLED */ 1437 nsrv4->rfs4_lockowner_pid_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab, 1438 "pid", pid_hash, 1439 pid_compare, pid_mkkey, 1440 FALSE); 1441 1442 nsrv4->rfs4_file_cache_time *= rfs4_lease_time; 1443 nsrv4->rfs4_file_tab = rfs4_table_create(nsrv4->nfs4_server_state, 1444 "File", 1445 nsrv4->rfs4_file_cache_time, 1446 1, 1447 rfs4_file_create, 1448 rfs4_file_destroy, 1449 NULL, 1450 sizeof (rfs4_file_t), 1451 TABSIZE, 1452 MAXTABSZ, -1); 1453 1454 nsrv4->rfs4_file_idx = rfs4_index_create(nsrv4->rfs4_file_tab, 1455 "Filehandle", file_hash, 1456 file_compare, file_mkkey, TRUE); 1457 1458 nsrv4->rfs4_deleg_state_cache_time *= rfs4_lease_time; 1459 /* CSTYLED */ 1460 nsrv4->rfs4_deleg_state_tab = rfs4_table_create(nsrv4->nfs4_server_state, 1461 "DelegStateID", 1462 nsrv4->rfs4_deleg_state_cache_time, 1463 2, 1464 rfs4_deleg_state_create, 1465 rfs4_deleg_state_destroy, 1466 rfs4_deleg_state_expiry, 1467 sizeof (rfs4_deleg_state_t), 1468 TABSIZE, 1469 MAXTABSZ, 100); 1470 nsrv4->rfs4_deleg_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab, 1471 "DelegByFileClient", 1472 deleg_hash, 1473 deleg_compare, 1474 deleg_mkkey, TRUE); 1475 1476 /* CSTYLED */ 1477 nsrv4->rfs4_deleg_state_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab, 1478 "DelegState", 1479 deleg_state_hash, 1480 deleg_state_compare, 1481 deleg_state_mkkey, FALSE); 1482 1483 mutex_exit(&nsrv4->state_lock); 1484 1485 /* 1486 * Init the stable storage. 1487 */ 1488 rfs4_ss_init(nsrv4); 1489 } 1490 1491 /* 1492 * Used at server shutdown to cleanup all of NFSv4 server's zone structures 1493 * and state. 1494 */ 1495 void 1496 rfs4_state_zone_fini() 1497 { 1498 rfs4_database_t *dbp; 1499 nfs4_srv_t *nsrv4; 1500 nsrv4 = nfs4_get_srv(); 1501 1502 rfs4_set_deleg_policy(nsrv4, SRV_NEVER_DELEGATE); 1503 1504 /* 1505 * Clean up any dangling stable storage structures BEFORE calling 1506 * rfs4_servinst_destroy_all() so there are no dangling structures 1507 * (i.e. the srvinsts are all cleared of danglers BEFORE they get 1508 * freed). 1509 */ 1510 rfs4_ss_fini(nsrv4); 1511 1512 mutex_enter(&nsrv4->state_lock); 1513 1514 if (nsrv4->nfs4_server_state == NULL) { 1515 mutex_exit(&nsrv4->state_lock); 1516 return; 1517 } 1518 1519 /* destroy server instances and current instance ptr */ 1520 rfs4_servinst_destroy_all(nsrv4); 1521 1522 /* reset the "first NFSv4 request" status */ 1523 nsrv4->seen_first_compound = 0; 1524 1525 dbp = nsrv4->nfs4_server_state; 1526 nsrv4->nfs4_server_state = NULL; 1527 1528 rw_destroy(&nsrv4->rfs4_findclient_lock); 1529 1530 /* First stop all of the reaper threads in the database */ 1531 rfs4_database_shutdown(dbp); 1532 1533 /* 1534 * WARNING: There may be consumers of the rfs4 database still 1535 * active as we destroy these. IF that's the case, consider putting 1536 * some of their _zone_fini()-like functions into the zsd key as 1537 * ~~SHUTDOWN~~ functions instead of ~~DESTROY~~ functions. We can 1538 * maintain some ordering guarantees better that way. 1539 */ 1540 /* Now destroy/release the database tables */ 1541 rfs4_database_destroy(dbp); 1542 1543 /* Reset the cache timers for next time */ 1544 nsrv4->rfs4_client_cache_time = 0; 1545 nsrv4->rfs4_openowner_cache_time = 0; 1546 nsrv4->rfs4_state_cache_time = 0; 1547 nsrv4->rfs4_lo_state_cache_time = 0; 1548 nsrv4->rfs4_lockowner_cache_time = 0; 1549 nsrv4->rfs4_file_cache_time = 0; 1550 nsrv4->rfs4_deleg_state_cache_time = 0; 1551 1552 mutex_exit(&nsrv4->state_lock); 1553 } 1554 1555 typedef union { 1556 struct { 1557 uint32_t start_time; 1558 uint32_t c_id; 1559 } impl_id; 1560 clientid4 id4; 1561 } cid; 1562 1563 static int foreign_stateid(stateid_t *id); 1564 static int foreign_clientid(cid *cidp); 1565 static void embed_nodeid(cid *cidp); 1566 1567 typedef union { 1568 struct { 1569 uint32_t c_id; 1570 uint32_t gen_num; 1571 } cv_impl; 1572 verifier4 confirm_verf; 1573 } scid_confirm_verf; 1574 1575 static uint32_t 1576 clientid_hash(void *key) 1577 { 1578 cid *idp = key; 1579 1580 return (idp->impl_id.c_id); 1581 } 1582 1583 static bool_t 1584 clientid_compare(rfs4_entry_t entry, void *key) 1585 { 1586 rfs4_client_t *cp = (rfs4_client_t *)entry; 1587 clientid4 *idp = key; 1588 1589 return (*idp == cp->rc_clientid); 1590 } 1591 1592 static void * 1593 clientid_mkkey(rfs4_entry_t entry) 1594 { 1595 rfs4_client_t *cp = (rfs4_client_t *)entry; 1596 1597 return (&cp->rc_clientid); 1598 } 1599 1600 static uint32_t 1601 nfsclnt_hash(void *key) 1602 { 1603 nfs_client_id4 *client = key; 1604 int i; 1605 uint32_t hash = 0; 1606 1607 for (i = 0; i < client->id_len; i++) { 1608 hash <<= 1; 1609 hash += (uint_t)client->id_val[i]; 1610 } 1611 return (hash); 1612 } 1613 1614 1615 static bool_t 1616 nfsclnt_compare(rfs4_entry_t entry, void *key) 1617 { 1618 rfs4_client_t *cp = (rfs4_client_t *)entry; 1619 nfs_client_id4 *nfs_client = key; 1620 1621 if (cp->rc_nfs_client.id_len != nfs_client->id_len) 1622 return (FALSE); 1623 1624 return (bcmp(cp->rc_nfs_client.id_val, nfs_client->id_val, 1625 nfs_client->id_len) == 0); 1626 } 1627 1628 static void * 1629 nfsclnt_mkkey(rfs4_entry_t entry) 1630 { 1631 rfs4_client_t *cp = (rfs4_client_t *)entry; 1632 1633 return (&cp->rc_nfs_client); 1634 } 1635 1636 static bool_t 1637 rfs4_client_expiry(rfs4_entry_t u_entry) 1638 { 1639 rfs4_client_t *cp = (rfs4_client_t *)u_entry; 1640 bool_t cp_expired; 1641 1642 if (rfs4_dbe_is_invalid(cp->rc_dbe)) { 1643 cp->rc_ss_remove = 1; 1644 return (TRUE); 1645 } 1646 /* 1647 * If the sysadmin has used clear_locks for this 1648 * entry then forced_expire will be set and we 1649 * want this entry to be reaped. Or the entry 1650 * has exceeded its lease period. 1651 */ 1652 cp_expired = (cp->rc_forced_expire || 1653 (gethrestime_sec() - cp->rc_last_access 1654 > rfs4_lease_time)); 1655 1656 if (!cp->rc_ss_remove && cp_expired) 1657 cp->rc_ss_remove = 1; 1658 return (cp_expired); 1659 } 1660 1661 /* 1662 * Remove the leaf file from all distributed stable storage paths. 1663 */ 1664 static void 1665 rfs4_dss_remove_cpleaf(rfs4_client_t *cp) 1666 { 1667 nfs4_srv_t *nsrv4; 1668 rfs4_servinst_t *sip; 1669 char *leaf = cp->rc_ss_pn->leaf; 1670 1671 /* 1672 * since the state files are written to all DSS 1673 * paths we must remove this leaf file instance 1674 * from all server instances. 1675 */ 1676 1677 nsrv4 = nfs4_get_srv(); 1678 mutex_enter(&nsrv4->servinst_lock); 1679 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) { 1680 /* remove the leaf file associated with this server instance */ 1681 rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf); 1682 } 1683 mutex_exit(&nsrv4->servinst_lock); 1684 } 1685 1686 static void 1687 rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf) 1688 { 1689 int i, npaths = sip->dss_npaths; 1690 1691 for (i = 0; i < npaths; i++) { 1692 rfs4_dss_path_t *dss_path = sip->dss_paths[i]; 1693 char *path, *dir; 1694 size_t pathlen; 1695 1696 /* the HA-NFSv4 path might have been failed-over away from us */ 1697 if (dss_path == NULL) 1698 continue; 1699 1700 dir = dss_path->path; 1701 1702 /* allow 3 extra bytes for two '/' & a NUL */ 1703 pathlen = strlen(dir) + strlen(dir_leaf) + strlen(leaf) + 3; 1704 path = kmem_alloc(pathlen, KM_SLEEP); 1705 (void) sprintf(path, "%s/%s/%s", dir, dir_leaf, leaf); 1706 1707 (void) vn_remove(path, UIO_SYSSPACE, RMFILE); 1708 1709 kmem_free(path, pathlen); 1710 } 1711 } 1712 1713 static void 1714 rfs4_client_destroy(rfs4_entry_t u_entry) 1715 { 1716 rfs4_client_t *cp = (rfs4_client_t *)u_entry; 1717 1718 mutex_destroy(cp->rc_cbinfo.cb_lock); 1719 cv_destroy(cp->rc_cbinfo.cb_cv); 1720 cv_destroy(cp->rc_cbinfo.cb_cv_nullcaller); 1721 list_destroy(&cp->rc_openownerlist); 1722 1723 /* free callback info */ 1724 rfs4_cbinfo_free(&cp->rc_cbinfo); 1725 1726 if (cp->rc_cp_confirmed) 1727 rfs4_client_rele(cp->rc_cp_confirmed); 1728 1729 if (cp->rc_ss_pn) { 1730 /* check if the stable storage files need to be removed */ 1731 if (cp->rc_ss_remove) 1732 rfs4_dss_remove_cpleaf(cp); 1733 rfs4_ss_pnfree(cp->rc_ss_pn); 1734 } 1735 1736 /* Free the client supplied client id */ 1737 kmem_free(cp->rc_nfs_client.id_val, cp->rc_nfs_client.id_len); 1738 1739 if (cp->rc_sysidt != LM_NOSYSID) 1740 lm_free_sysidt(cp->rc_sysidt); 1741 } 1742 1743 static bool_t 1744 rfs4_client_create(rfs4_entry_t u_entry, void *arg) 1745 { 1746 rfs4_client_t *cp = (rfs4_client_t *)u_entry; 1747 nfs_client_id4 *client = (nfs_client_id4 *)arg; 1748 struct sockaddr *ca; 1749 cid *cidp; 1750 scid_confirm_verf *scvp; 1751 nfs4_srv_t *nsrv4; 1752 1753 nsrv4 = nfs4_get_srv(); 1754 1755 /* Get a clientid to give to the client */ 1756 cidp = (cid *)&cp->rc_clientid; 1757 cidp->impl_id.start_time = nsrv4->rfs4_start_time; 1758 cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe); 1759 1760 /* If we are booted as a cluster node, embed our nodeid */ 1761 if (cluster_bootflags & CLUSTER_BOOTED) 1762 embed_nodeid(cidp); 1763 1764 /* Allocate and copy client's client id value */ 1765 cp->rc_nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP); 1766 cp->rc_nfs_client.id_len = client->id_len; 1767 bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len); 1768 cp->rc_nfs_client.verifier = client->verifier; 1769 1770 /* Copy client's IP address */ 1771 ca = client->cl_addr; 1772 if (ca->sa_family == AF_INET) 1773 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in)); 1774 else if (ca->sa_family == AF_INET6) 1775 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in6)); 1776 cp->rc_nfs_client.cl_addr = (struct sockaddr *)&cp->rc_addr; 1777 1778 /* Init the value for the SETCLIENTID_CONFIRM verifier */ 1779 scvp = (scid_confirm_verf *)&cp->rc_confirm_verf; 1780 scvp->cv_impl.c_id = cidp->impl_id.c_id; 1781 scvp->cv_impl.gen_num = 0; 1782 1783 /* An F_UNLKSYS has been done for this client */ 1784 cp->rc_unlksys_completed = FALSE; 1785 1786 /* We need the client to ack us */ 1787 cp->rc_need_confirm = TRUE; 1788 cp->rc_cp_confirmed = NULL; 1789 1790 /* TRUE all the time until the callback path actually fails */ 1791 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE; 1792 1793 /* Initialize the access time to now */ 1794 cp->rc_last_access = gethrestime_sec(); 1795 1796 cp->rc_cr_set = NULL; 1797 1798 cp->rc_sysidt = LM_NOSYSID; 1799 1800 list_create(&cp->rc_openownerlist, sizeof (rfs4_openowner_t), 1801 offsetof(rfs4_openowner_t, ro_node)); 1802 1803 /* set up the callback control structure */ 1804 cp->rc_cbinfo.cb_state = CB_UNINIT; 1805 mutex_init(cp->rc_cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL); 1806 cv_init(cp->rc_cbinfo.cb_cv, NULL, CV_DEFAULT, NULL); 1807 cv_init(cp->rc_cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL); 1808 1809 /* 1810 * Associate the client_t with the current server instance. 1811 * The hold is solely to satisfy the calling requirement of 1812 * rfs4_servinst_assign(). In this case it's not strictly necessary. 1813 */ 1814 rfs4_dbe_hold(cp->rc_dbe); 1815 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst); 1816 rfs4_dbe_rele(cp->rc_dbe); 1817 1818 return (TRUE); 1819 } 1820 1821 /* 1822 * Caller wants to generate/update the setclientid_confirm verifier 1823 * associated with a client. This is done during the SETCLIENTID 1824 * processing. 1825 */ 1826 void 1827 rfs4_client_scv_next(rfs4_client_t *cp) 1828 { 1829 scid_confirm_verf *scvp; 1830 1831 /* Init the value for the SETCLIENTID_CONFIRM verifier */ 1832 scvp = (scid_confirm_verf *)&cp->rc_confirm_verf; 1833 scvp->cv_impl.gen_num++; 1834 } 1835 1836 void 1837 rfs4_client_rele(rfs4_client_t *cp) 1838 { 1839 rfs4_dbe_rele(cp->rc_dbe); 1840 } 1841 1842 rfs4_client_t * 1843 rfs4_findclient(nfs_client_id4 *client, bool_t *create, rfs4_client_t *oldcp) 1844 { 1845 rfs4_client_t *cp; 1846 nfs4_srv_t *nsrv4; 1847 nsrv4 = nfs4_get_srv(); 1848 1849 1850 if (oldcp) { 1851 rw_enter(&nsrv4->rfs4_findclient_lock, RW_WRITER); 1852 rfs4_dbe_hide(oldcp->rc_dbe); 1853 } else { 1854 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER); 1855 } 1856 1857 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_nfsclnt_idx, client, 1858 create, (void *)client, RFS4_DBS_VALID); 1859 1860 if (oldcp) 1861 rfs4_dbe_unhide(oldcp->rc_dbe); 1862 1863 rw_exit(&nsrv4->rfs4_findclient_lock); 1864 1865 return (cp); 1866 } 1867 1868 rfs4_client_t * 1869 rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed) 1870 { 1871 rfs4_client_t *cp; 1872 bool_t create = FALSE; 1873 cid *cidp = (cid *)&clientid; 1874 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 1875 1876 /* If we're a cluster and the nodeid isn't right, short-circuit */ 1877 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp)) 1878 return (NULL); 1879 1880 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER); 1881 1882 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx, &clientid, 1883 &create, NULL, RFS4_DBS_VALID); 1884 1885 rw_exit(&nsrv4->rfs4_findclient_lock); 1886 1887 if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) { 1888 rfs4_client_rele(cp); 1889 return (NULL); 1890 } else { 1891 return (cp); 1892 } 1893 } 1894 1895 static uint32_t 1896 clntip_hash(void *key) 1897 { 1898 struct sockaddr *addr = key; 1899 int i, len = 0; 1900 uint32_t hash = 0; 1901 char *ptr; 1902 1903 if (addr->sa_family == AF_INET) { 1904 struct sockaddr_in *a = (struct sockaddr_in *)addr; 1905 len = sizeof (struct in_addr); 1906 ptr = (char *)&a->sin_addr; 1907 } else if (addr->sa_family == AF_INET6) { 1908 struct sockaddr_in6 *a = (struct sockaddr_in6 *)addr; 1909 len = sizeof (struct in6_addr); 1910 ptr = (char *)&a->sin6_addr; 1911 } else 1912 return (0); 1913 1914 for (i = 0; i < len; i++) { 1915 hash <<= 1; 1916 hash += (uint_t)ptr[i]; 1917 } 1918 return (hash); 1919 } 1920 1921 static bool_t 1922 clntip_compare(rfs4_entry_t entry, void *key) 1923 { 1924 rfs4_clntip_t *cp = (rfs4_clntip_t *)entry; 1925 struct sockaddr *addr = key; 1926 int len = 0; 1927 char *p1, *p2; 1928 1929 if (addr->sa_family == AF_INET) { 1930 struct sockaddr_in *a1 = (struct sockaddr_in *)&cp->ri_addr; 1931 struct sockaddr_in *a2 = (struct sockaddr_in *)addr; 1932 len = sizeof (struct in_addr); 1933 p1 = (char *)&a1->sin_addr; 1934 p2 = (char *)&a2->sin_addr; 1935 } else if (addr->sa_family == AF_INET6) { 1936 struct sockaddr_in6 *a1 = (struct sockaddr_in6 *)&cp->ri_addr; 1937 struct sockaddr_in6 *a2 = (struct sockaddr_in6 *)addr; 1938 len = sizeof (struct in6_addr); 1939 p1 = (char *)&a1->sin6_addr; 1940 p2 = (char *)&a2->sin6_addr; 1941 } else 1942 return (0); 1943 1944 return (bcmp(p1, p2, len) == 0); 1945 } 1946 1947 static void * 1948 clntip_mkkey(rfs4_entry_t entry) 1949 { 1950 rfs4_clntip_t *cp = (rfs4_clntip_t *)entry; 1951 1952 return (&cp->ri_addr); 1953 } 1954 1955 static bool_t 1956 rfs4_clntip_expiry(rfs4_entry_t u_entry) 1957 { 1958 rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry; 1959 1960 if (rfs4_dbe_is_invalid(cp->ri_dbe)) 1961 return (TRUE); 1962 return (FALSE); 1963 } 1964 1965 /* ARGSUSED */ 1966 static void 1967 rfs4_clntip_destroy(rfs4_entry_t u_entry) 1968 { 1969 } 1970 1971 static bool_t 1972 rfs4_clntip_create(rfs4_entry_t u_entry, void *arg) 1973 { 1974 rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry; 1975 struct sockaddr *ca = (struct sockaddr *)arg; 1976 1977 /* Copy client's IP address */ 1978 if (ca->sa_family == AF_INET) 1979 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in)); 1980 else if (ca->sa_family == AF_INET6) 1981 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in6)); 1982 else 1983 return (FALSE); 1984 cp->ri_no_referrals = 1; 1985 1986 return (TRUE); 1987 } 1988 1989 rfs4_clntip_t * 1990 rfs4_find_clntip(struct sockaddr *addr, bool_t *create) 1991 { 1992 rfs4_clntip_t *cp; 1993 nfs4_srv_t *nsrv4; 1994 1995 nsrv4 = nfs4_get_srv(); 1996 1997 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER); 1998 1999 cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr, 2000 create, addr, RFS4_DBS_VALID); 2001 2002 rw_exit(&nsrv4->rfs4_findclient_lock); 2003 2004 return (cp); 2005 } 2006 2007 void 2008 rfs4_invalidate_clntip(struct sockaddr *addr) 2009 { 2010 rfs4_clntip_t *cp; 2011 bool_t create = FALSE; 2012 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 2013 2014 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER); 2015 2016 cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr, 2017 &create, NULL, RFS4_DBS_VALID); 2018 if (cp == NULL) { 2019 rw_exit(&nsrv4->rfs4_findclient_lock); 2020 return; 2021 } 2022 rfs4_dbe_invalidate(cp->ri_dbe); 2023 rfs4_dbe_rele(cp->ri_dbe); 2024 2025 rw_exit(&nsrv4->rfs4_findclient_lock); 2026 } 2027 2028 bool_t 2029 rfs4_lease_expired(rfs4_client_t *cp) 2030 { 2031 bool_t rc; 2032 2033 rfs4_dbe_lock(cp->rc_dbe); 2034 2035 /* 2036 * If the admin has executed clear_locks for this 2037 * client id, force expire will be set, so no need 2038 * to calculate anything because it's "outa here". 2039 */ 2040 if (cp->rc_forced_expire) { 2041 rc = TRUE; 2042 } else { 2043 rc = (gethrestime_sec() - cp->rc_last_access > rfs4_lease_time); 2044 } 2045 2046 /* 2047 * If the lease has expired we will also want 2048 * to remove any stable storage state data. So 2049 * mark the client id accordingly. 2050 */ 2051 if (!cp->rc_ss_remove) 2052 cp->rc_ss_remove = (rc == TRUE); 2053 2054 rfs4_dbe_unlock(cp->rc_dbe); 2055 2056 return (rc); 2057 } 2058 2059 void 2060 rfs4_update_lease(rfs4_client_t *cp) 2061 { 2062 rfs4_dbe_lock(cp->rc_dbe); 2063 if (!cp->rc_forced_expire) 2064 cp->rc_last_access = gethrestime_sec(); 2065 rfs4_dbe_unlock(cp->rc_dbe); 2066 } 2067 2068 2069 static bool_t 2070 EQOPENOWNER(open_owner4 *a, open_owner4 *b) 2071 { 2072 bool_t rc; 2073 2074 if (a->clientid != b->clientid) 2075 return (FALSE); 2076 2077 if (a->owner_len != b->owner_len) 2078 return (FALSE); 2079 2080 rc = (bcmp(a->owner_val, b->owner_val, a->owner_len) == 0); 2081 2082 return (rc); 2083 } 2084 2085 static uint_t 2086 openowner_hash(void *key) 2087 { 2088 int i; 2089 open_owner4 *openowner = key; 2090 uint_t hash = 0; 2091 2092 for (i = 0; i < openowner->owner_len; i++) { 2093 hash <<= 4; 2094 hash += (uint_t)openowner->owner_val[i]; 2095 } 2096 hash += (uint_t)openowner->clientid; 2097 hash |= (openowner->clientid >> 32); 2098 2099 return (hash); 2100 } 2101 2102 static bool_t 2103 openowner_compare(rfs4_entry_t u_entry, void *key) 2104 { 2105 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry; 2106 open_owner4 *arg = key; 2107 2108 return (EQOPENOWNER(&oo->ro_owner, arg)); 2109 } 2110 2111 void * 2112 openowner_mkkey(rfs4_entry_t u_entry) 2113 { 2114 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry; 2115 2116 return (&oo->ro_owner); 2117 } 2118 2119 /* ARGSUSED */ 2120 static bool_t 2121 rfs4_openowner_expiry(rfs4_entry_t u_entry) 2122 { 2123 /* openstateid held us and did all needed delay */ 2124 return (TRUE); 2125 } 2126 2127 static void 2128 rfs4_openowner_destroy(rfs4_entry_t u_entry) 2129 { 2130 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry; 2131 2132 /* Remove open owner from client's lists of open owners */ 2133 rfs4_dbe_lock(oo->ro_client->rc_dbe); 2134 list_remove(&oo->ro_client->rc_openownerlist, oo); 2135 rfs4_dbe_unlock(oo->ro_client->rc_dbe); 2136 2137 /* One less reference to the client */ 2138 rfs4_client_rele(oo->ro_client); 2139 oo->ro_client = NULL; 2140 2141 /* Free the last reply for this lock owner */ 2142 rfs4_free_reply(&oo->ro_reply); 2143 2144 if (oo->ro_reply_fh.nfs_fh4_val) { 2145 kmem_free(oo->ro_reply_fh.nfs_fh4_val, 2146 oo->ro_reply_fh.nfs_fh4_len); 2147 oo->ro_reply_fh.nfs_fh4_val = NULL; 2148 oo->ro_reply_fh.nfs_fh4_len = 0; 2149 } 2150 2151 rfs4_sw_destroy(&oo->ro_sw); 2152 list_destroy(&oo->ro_statelist); 2153 2154 /* Free the lock owner id */ 2155 kmem_free(oo->ro_owner.owner_val, oo->ro_owner.owner_len); 2156 } 2157 2158 void 2159 rfs4_openowner_rele(rfs4_openowner_t *oo) 2160 { 2161 rfs4_dbe_rele(oo->ro_dbe); 2162 } 2163 2164 static bool_t 2165 rfs4_openowner_create(rfs4_entry_t u_entry, void *arg) 2166 { 2167 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry; 2168 rfs4_openowner_t *argp = (rfs4_openowner_t *)arg; 2169 open_owner4 *openowner = &argp->ro_owner; 2170 seqid4 seqid = argp->ro_open_seqid; 2171 rfs4_client_t *cp; 2172 bool_t create = FALSE; 2173 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 2174 2175 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER); 2176 2177 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx, 2178 &openowner->clientid, 2179 &create, NULL, RFS4_DBS_VALID); 2180 2181 rw_exit(&nsrv4->rfs4_findclient_lock); 2182 2183 if (cp == NULL) 2184 return (FALSE); 2185 2186 oo->ro_reply_fh.nfs_fh4_len = 0; 2187 oo->ro_reply_fh.nfs_fh4_val = NULL; 2188 2189 oo->ro_owner.clientid = openowner->clientid; 2190 oo->ro_owner.owner_val = 2191 kmem_alloc(openowner->owner_len, KM_SLEEP); 2192 2193 bcopy(openowner->owner_val, 2194 oo->ro_owner.owner_val, openowner->owner_len); 2195 2196 oo->ro_owner.owner_len = openowner->owner_len; 2197 2198 oo->ro_need_confirm = TRUE; 2199 2200 rfs4_sw_init(&oo->ro_sw); 2201 2202 oo->ro_open_seqid = seqid; 2203 bzero(&oo->ro_reply, sizeof (nfs_resop4)); 2204 oo->ro_client = cp; 2205 oo->ro_cr_set = NULL; 2206 2207 list_create(&oo->ro_statelist, sizeof (rfs4_state_t), 2208 offsetof(rfs4_state_t, rs_node)); 2209 2210 /* Insert openowner into client's open owner list */ 2211 rfs4_dbe_lock(cp->rc_dbe); 2212 list_insert_tail(&cp->rc_openownerlist, oo); 2213 rfs4_dbe_unlock(cp->rc_dbe); 2214 2215 return (TRUE); 2216 } 2217 2218 rfs4_openowner_t * 2219 rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid) 2220 { 2221 rfs4_openowner_t *oo; 2222 rfs4_openowner_t arg; 2223 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 2224 2225 arg.ro_owner = *openowner; 2226 arg.ro_open_seqid = seqid; 2227 /* CSTYLED */ 2228 oo = (rfs4_openowner_t *)rfs4_dbsearch(nsrv4->rfs4_openowner_idx, openowner, 2229 create, &arg, RFS4_DBS_VALID); 2230 2231 return (oo); 2232 } 2233 2234 void 2235 rfs4_update_open_sequence(rfs4_openowner_t *oo) 2236 { 2237 2238 rfs4_dbe_lock(oo->ro_dbe); 2239 2240 oo->ro_open_seqid++; 2241 2242 rfs4_dbe_unlock(oo->ro_dbe); 2243 } 2244 2245 void 2246 rfs4_update_open_resp(rfs4_openowner_t *oo, nfs_resop4 *resp, nfs_fh4 *fh) 2247 { 2248 2249 rfs4_dbe_lock(oo->ro_dbe); 2250 2251 rfs4_free_reply(&oo->ro_reply); 2252 2253 rfs4_copy_reply(&oo->ro_reply, resp); 2254 2255 /* Save the filehandle if provided and free if not used */ 2256 if (resp->nfs_resop4_u.opopen.status == NFS4_OK && 2257 fh && fh->nfs_fh4_len) { 2258 if (oo->ro_reply_fh.nfs_fh4_val == NULL) 2259 oo->ro_reply_fh.nfs_fh4_val = 2260 kmem_alloc(fh->nfs_fh4_len, KM_SLEEP); 2261 nfs_fh4_copy(fh, &oo->ro_reply_fh); 2262 } else { 2263 if (oo->ro_reply_fh.nfs_fh4_val) { 2264 kmem_free(oo->ro_reply_fh.nfs_fh4_val, 2265 oo->ro_reply_fh.nfs_fh4_len); 2266 oo->ro_reply_fh.nfs_fh4_val = NULL; 2267 oo->ro_reply_fh.nfs_fh4_len = 0; 2268 } 2269 } 2270 2271 rfs4_dbe_unlock(oo->ro_dbe); 2272 } 2273 2274 static bool_t 2275 lockowner_compare(rfs4_entry_t u_entry, void *key) 2276 { 2277 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry; 2278 lock_owner4 *b = (lock_owner4 *)key; 2279 2280 if (lo->rl_owner.clientid != b->clientid) 2281 return (FALSE); 2282 2283 if (lo->rl_owner.owner_len != b->owner_len) 2284 return (FALSE); 2285 2286 return (bcmp(lo->rl_owner.owner_val, b->owner_val, 2287 lo->rl_owner.owner_len) == 0); 2288 } 2289 2290 void * 2291 lockowner_mkkey(rfs4_entry_t u_entry) 2292 { 2293 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry; 2294 2295 return (&lo->rl_owner); 2296 } 2297 2298 static uint32_t 2299 lockowner_hash(void *key) 2300 { 2301 int i; 2302 lock_owner4 *lockowner = key; 2303 uint_t hash = 0; 2304 2305 for (i = 0; i < lockowner->owner_len; i++) { 2306 hash <<= 4; 2307 hash += (uint_t)lockowner->owner_val[i]; 2308 } 2309 hash += (uint_t)lockowner->clientid; 2310 hash |= (lockowner->clientid >> 32); 2311 2312 return (hash); 2313 } 2314 2315 static uint32_t 2316 pid_hash(void *key) 2317 { 2318 return ((uint32_t)(uintptr_t)key); 2319 } 2320 2321 static void * 2322 pid_mkkey(rfs4_entry_t u_entry) 2323 { 2324 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry; 2325 2326 return ((void *)(uintptr_t)lo->rl_pid); 2327 } 2328 2329 static bool_t 2330 pid_compare(rfs4_entry_t u_entry, void *key) 2331 { 2332 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry; 2333 2334 return (lo->rl_pid == (pid_t)(uintptr_t)key); 2335 } 2336 2337 static void 2338 rfs4_lockowner_destroy(rfs4_entry_t u_entry) 2339 { 2340 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry; 2341 2342 /* Free the lock owner id */ 2343 kmem_free(lo->rl_owner.owner_val, lo->rl_owner.owner_len); 2344 rfs4_client_rele(lo->rl_client); 2345 } 2346 2347 void 2348 rfs4_lockowner_rele(rfs4_lockowner_t *lo) 2349 { 2350 rfs4_dbe_rele(lo->rl_dbe); 2351 } 2352 2353 /* ARGSUSED */ 2354 static bool_t 2355 rfs4_lockowner_expiry(rfs4_entry_t u_entry) 2356 { 2357 /* 2358 * Since expiry is called with no other references on 2359 * this struct, go ahead and have it removed. 2360 */ 2361 return (TRUE); 2362 } 2363 2364 static bool_t 2365 rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg) 2366 { 2367 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry; 2368 lock_owner4 *lockowner = (lock_owner4 *)arg; 2369 rfs4_client_t *cp; 2370 bool_t create = FALSE; 2371 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 2372 2373 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER); 2374 2375 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx, 2376 &lockowner->clientid, 2377 &create, NULL, RFS4_DBS_VALID); 2378 2379 rw_exit(&nsrv4->rfs4_findclient_lock); 2380 2381 if (cp == NULL) 2382 return (FALSE); 2383 2384 /* Reference client */ 2385 lo->rl_client = cp; 2386 lo->rl_owner.clientid = lockowner->clientid; 2387 lo->rl_owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP); 2388 bcopy(lockowner->owner_val, lo->rl_owner.owner_val, 2389 lockowner->owner_len); 2390 lo->rl_owner.owner_len = lockowner->owner_len; 2391 lo->rl_pid = rfs4_dbe_getid(lo->rl_dbe); 2392 2393 return (TRUE); 2394 } 2395 2396 rfs4_lockowner_t * 2397 rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create) 2398 { 2399 rfs4_lockowner_t *lo; 2400 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 2401 2402 /* CSTYLED */ 2403 lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_idx, lockowner, 2404 create, lockowner, RFS4_DBS_VALID); 2405 2406 return (lo); 2407 } 2408 2409 rfs4_lockowner_t * 2410 rfs4_findlockowner_by_pid(pid_t pid) 2411 { 2412 rfs4_lockowner_t *lo; 2413 bool_t create = FALSE; 2414 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 2415 2416 lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_pid_idx, 2417 (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID); 2418 2419 return (lo); 2420 } 2421 2422 2423 static uint32_t 2424 file_hash(void *key) 2425 { 2426 return (ADDRHASH(key)); 2427 } 2428 2429 static void * 2430 file_mkkey(rfs4_entry_t u_entry) 2431 { 2432 rfs4_file_t *fp = (rfs4_file_t *)u_entry; 2433 2434 return (fp->rf_vp); 2435 } 2436 2437 static bool_t 2438 file_compare(rfs4_entry_t u_entry, void *key) 2439 { 2440 rfs4_file_t *fp = (rfs4_file_t *)u_entry; 2441 2442 return (fp->rf_vp == (vnode_t *)key); 2443 } 2444 2445 static void 2446 rfs4_file_destroy(rfs4_entry_t u_entry) 2447 { 2448 rfs4_file_t *fp = (rfs4_file_t *)u_entry; 2449 2450 list_destroy(&fp->rf_delegstatelist); 2451 2452 if (fp->rf_filehandle.nfs_fh4_val) 2453 kmem_free(fp->rf_filehandle.nfs_fh4_val, 2454 fp->rf_filehandle.nfs_fh4_len); 2455 cv_destroy(fp->rf_dinfo.rd_recall_cv); 2456 if (fp->rf_vp) { 2457 vnode_t *vp = fp->rf_vp; 2458 2459 mutex_enter(&vp->v_vsd_lock); 2460 (void) vsd_set(vp, nfs4_srv_vkey, NULL); 2461 mutex_exit(&vp->v_vsd_lock); 2462 VN_RELE(vp); 2463 fp->rf_vp = NULL; 2464 } 2465 rw_destroy(&fp->rf_file_rwlock); 2466 } 2467 2468 /* 2469 * Used to unlock the underlying dbe struct only 2470 */ 2471 void 2472 rfs4_file_rele(rfs4_file_t *fp) 2473 { 2474 rfs4_dbe_rele(fp->rf_dbe); 2475 } 2476 2477 typedef struct { 2478 vnode_t *vp; 2479 nfs_fh4 *fh; 2480 } rfs4_fcreate_arg; 2481 2482 static bool_t 2483 rfs4_file_create(rfs4_entry_t u_entry, void *arg) 2484 { 2485 rfs4_file_t *fp = (rfs4_file_t *)u_entry; 2486 rfs4_fcreate_arg *ap = (rfs4_fcreate_arg *)arg; 2487 vnode_t *vp = ap->vp; 2488 nfs_fh4 *fh = ap->fh; 2489 2490 VN_HOLD(vp); 2491 2492 fp->rf_filehandle.nfs_fh4_len = 0; 2493 fp->rf_filehandle.nfs_fh4_val = NULL; 2494 ASSERT(fh && fh->nfs_fh4_len); 2495 if (fh && fh->nfs_fh4_len) { 2496 fp->rf_filehandle.nfs_fh4_val = 2497 kmem_alloc(fh->nfs_fh4_len, KM_SLEEP); 2498 nfs_fh4_copy(fh, &fp->rf_filehandle); 2499 } 2500 fp->rf_vp = vp; 2501 2502 list_create(&fp->rf_delegstatelist, sizeof (rfs4_deleg_state_t), 2503 offsetof(rfs4_deleg_state_t, rds_node)); 2504 2505 fp->rf_share_deny = fp->rf_share_access = fp->rf_access_read = 0; 2506 fp->rf_access_write = fp->rf_deny_read = fp->rf_deny_write = 0; 2507 2508 mutex_init(fp->rf_dinfo.rd_recall_lock, NULL, MUTEX_DEFAULT, NULL); 2509 cv_init(fp->rf_dinfo.rd_recall_cv, NULL, CV_DEFAULT, NULL); 2510 2511 fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE; 2512 2513 rw_init(&fp->rf_file_rwlock, NULL, RW_DEFAULT, NULL); 2514 2515 mutex_enter(&vp->v_vsd_lock); 2516 VERIFY(vsd_set(vp, nfs4_srv_vkey, (void *)fp) == 0); 2517 mutex_exit(&vp->v_vsd_lock); 2518 2519 return (TRUE); 2520 } 2521 2522 rfs4_file_t * 2523 rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create) 2524 { 2525 rfs4_file_t *fp; 2526 rfs4_fcreate_arg arg; 2527 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 2528 2529 arg.vp = vp; 2530 arg.fh = fh; 2531 2532 if (*create == TRUE) 2533 /* CSTYLED */ 2534 fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp, create, 2535 &arg, RFS4_DBS_VALID); 2536 else { 2537 mutex_enter(&vp->v_vsd_lock); 2538 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey); 2539 if (fp) { 2540 rfs4_dbe_lock(fp->rf_dbe); 2541 if (rfs4_dbe_is_invalid(fp->rf_dbe) || 2542 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) { 2543 rfs4_dbe_unlock(fp->rf_dbe); 2544 fp = NULL; 2545 } else { 2546 rfs4_dbe_hold(fp->rf_dbe); 2547 rfs4_dbe_unlock(fp->rf_dbe); 2548 } 2549 } 2550 mutex_exit(&vp->v_vsd_lock); 2551 } 2552 return (fp); 2553 } 2554 2555 /* 2556 * Find a file in the db and once it is located, take the rw lock. 2557 * Need to check the vnode pointer and if it does not exist (it was 2558 * removed between the db location and check) redo the find. This 2559 * assumes that a file struct that has a NULL vnode pointer is marked 2560 * at 'invalid' and will not be found in the db the second time 2561 * around. 2562 */ 2563 rfs4_file_t * 2564 rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create) 2565 { 2566 rfs4_file_t *fp; 2567 rfs4_fcreate_arg arg; 2568 bool_t screate = *create; 2569 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 2570 2571 if (screate == FALSE) { 2572 mutex_enter(&vp->v_vsd_lock); 2573 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey); 2574 if (fp) { 2575 rfs4_dbe_lock(fp->rf_dbe); 2576 if (rfs4_dbe_is_invalid(fp->rf_dbe) || 2577 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) { 2578 rfs4_dbe_unlock(fp->rf_dbe); 2579 mutex_exit(&vp->v_vsd_lock); 2580 fp = NULL; 2581 } else { 2582 rfs4_dbe_hold(fp->rf_dbe); 2583 rfs4_dbe_unlock(fp->rf_dbe); 2584 mutex_exit(&vp->v_vsd_lock); 2585 rw_enter(&fp->rf_file_rwlock, RW_WRITER); 2586 if (fp->rf_vp == NULL) { 2587 rw_exit(&fp->rf_file_rwlock); 2588 rfs4_file_rele(fp); 2589 fp = NULL; 2590 } 2591 } 2592 } else { 2593 mutex_exit(&vp->v_vsd_lock); 2594 } 2595 } else { 2596 retry: 2597 arg.vp = vp; 2598 arg.fh = fh; 2599 2600 fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp, 2601 create, &arg, RFS4_DBS_VALID); 2602 if (fp != NULL) { 2603 rw_enter(&fp->rf_file_rwlock, RW_WRITER); 2604 if (fp->rf_vp == NULL) { 2605 rw_exit(&fp->rf_file_rwlock); 2606 rfs4_file_rele(fp); 2607 *create = screate; 2608 goto retry; 2609 } 2610 } 2611 } 2612 2613 return (fp); 2614 } 2615 2616 static uint32_t 2617 lo_state_hash(void *key) 2618 { 2619 stateid_t *id = key; 2620 2621 return (id->bits.ident+id->bits.pid); 2622 } 2623 2624 static bool_t 2625 lo_state_compare(rfs4_entry_t u_entry, void *key) 2626 { 2627 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry; 2628 stateid_t *id = key; 2629 bool_t rc; 2630 2631 rc = (lsp->rls_lockid.bits.boottime == id->bits.boottime && 2632 lsp->rls_lockid.bits.type == id->bits.type && 2633 lsp->rls_lockid.bits.ident == id->bits.ident && 2634 lsp->rls_lockid.bits.pid == id->bits.pid); 2635 2636 return (rc); 2637 } 2638 2639 static void * 2640 lo_state_mkkey(rfs4_entry_t u_entry) 2641 { 2642 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry; 2643 2644 return (&lsp->rls_lockid); 2645 } 2646 2647 static bool_t 2648 rfs4_lo_state_expiry(rfs4_entry_t u_entry) 2649 { 2650 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry; 2651 2652 if (rfs4_dbe_is_invalid(lsp->rls_dbe)) 2653 return (TRUE); 2654 if (lsp->rls_state->rs_closed) 2655 return (TRUE); 2656 return ((gethrestime_sec() - 2657 lsp->rls_state->rs_owner->ro_client->rc_last_access 2658 > rfs4_lease_time)); 2659 } 2660 2661 static void 2662 rfs4_lo_state_destroy(rfs4_entry_t u_entry) 2663 { 2664 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry; 2665 2666 rfs4_dbe_lock(lsp->rls_state->rs_dbe); 2667 list_remove(&lsp->rls_state->rs_lostatelist, lsp); 2668 rfs4_dbe_unlock(lsp->rls_state->rs_dbe); 2669 2670 rfs4_sw_destroy(&lsp->rls_sw); 2671 2672 /* Make sure to release the file locks */ 2673 if (lsp->rls_locks_cleaned == FALSE) { 2674 lsp->rls_locks_cleaned = TRUE; 2675 if (lsp->rls_locker->rl_client->rc_sysidt != LM_NOSYSID) { 2676 /* Is the PxFS kernel module loaded? */ 2677 if (lm_remove_file_locks != NULL) { 2678 int new_sysid; 2679 2680 /* Encode the cluster nodeid in new sysid */ 2681 new_sysid = 2682 lsp->rls_locker->rl_client->rc_sysidt; 2683 lm_set_nlmid_flk(&new_sysid); 2684 2685 /* 2686 * This PxFS routine removes file locks for a 2687 * client over all nodes of a cluster. 2688 */ 2689 DTRACE_PROBE1(nfss_i_clust_rm_lck, 2690 int, new_sysid); 2691 (*lm_remove_file_locks)(new_sysid); 2692 } else { 2693 (void) cleanlocks( 2694 lsp->rls_state->rs_finfo->rf_vp, 2695 lsp->rls_locker->rl_pid, 2696 lsp->rls_locker->rl_client->rc_sysidt); 2697 } 2698 } 2699 } 2700 2701 /* Free the last reply for this state */ 2702 rfs4_free_reply(&lsp->rls_reply); 2703 2704 rfs4_lockowner_rele(lsp->rls_locker); 2705 lsp->rls_locker = NULL; 2706 2707 rfs4_state_rele_nounlock(lsp->rls_state); 2708 lsp->rls_state = NULL; 2709 } 2710 2711 static bool_t 2712 rfs4_lo_state_create(rfs4_entry_t u_entry, void *arg) 2713 { 2714 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry; 2715 rfs4_lo_state_t *argp = (rfs4_lo_state_t *)arg; 2716 rfs4_lockowner_t *lo = argp->rls_locker; 2717 rfs4_state_t *sp = argp->rls_state; 2718 2719 lsp->rls_state = sp; 2720 2721 lsp->rls_lockid = sp->rs_stateid; 2722 lsp->rls_lockid.bits.type = LOCKID; 2723 lsp->rls_lockid.bits.chgseq = 0; 2724 lsp->rls_lockid.bits.pid = lo->rl_pid; 2725 2726 lsp->rls_locks_cleaned = FALSE; 2727 lsp->rls_lock_completed = FALSE; 2728 2729 rfs4_sw_init(&lsp->rls_sw); 2730 2731 /* Attached the supplied lock owner */ 2732 rfs4_dbe_hold(lo->rl_dbe); 2733 lsp->rls_locker = lo; 2734 2735 rfs4_dbe_lock(sp->rs_dbe); 2736 list_insert_tail(&sp->rs_lostatelist, lsp); 2737 rfs4_dbe_hold(sp->rs_dbe); 2738 rfs4_dbe_unlock(sp->rs_dbe); 2739 2740 return (TRUE); 2741 } 2742 2743 void 2744 rfs4_lo_state_rele(rfs4_lo_state_t *lsp, bool_t unlock_fp) 2745 { 2746 if (unlock_fp == TRUE) 2747 rw_exit(&lsp->rls_state->rs_finfo->rf_file_rwlock); 2748 rfs4_dbe_rele(lsp->rls_dbe); 2749 } 2750 2751 static rfs4_lo_state_t * 2752 rfs4_findlo_state(stateid_t *id, bool_t lock_fp) 2753 { 2754 rfs4_lo_state_t *lsp; 2755 bool_t create = FALSE; 2756 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 2757 2758 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_idx, id, 2759 &create, NULL, RFS4_DBS_VALID); 2760 if (lock_fp == TRUE && lsp != NULL) 2761 rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER); 2762 2763 return (lsp); 2764 } 2765 2766 2767 static uint32_t 2768 lo_state_lo_hash(void *key) 2769 { 2770 rfs4_lo_state_t *lsp = key; 2771 2772 return (ADDRHASH(lsp->rls_locker) ^ ADDRHASH(lsp->rls_state)); 2773 } 2774 2775 static bool_t 2776 lo_state_lo_compare(rfs4_entry_t u_entry, void *key) 2777 { 2778 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry; 2779 rfs4_lo_state_t *keyp = key; 2780 2781 return (keyp->rls_locker == lsp->rls_locker && 2782 keyp->rls_state == lsp->rls_state); 2783 } 2784 2785 static void * 2786 lo_state_lo_mkkey(rfs4_entry_t u_entry) 2787 { 2788 return (u_entry); 2789 } 2790 2791 rfs4_lo_state_t * 2792 rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp, 2793 bool_t *create) 2794 { 2795 rfs4_lo_state_t *lsp; 2796 rfs4_lo_state_t arg; 2797 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 2798 2799 arg.rls_locker = lo; 2800 arg.rls_state = sp; 2801 2802 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_owner_idx, 2803 &arg, create, &arg, RFS4_DBS_VALID); 2804 2805 return (lsp); 2806 } 2807 2808 static stateid_t 2809 get_stateid(id_t eid) 2810 { 2811 stateid_t id; 2812 nfs4_srv_t *nsrv4; 2813 2814 nsrv4 = nfs4_get_srv(); 2815 2816 id.bits.boottime = nsrv4->rfs4_start_time; 2817 id.bits.ident = eid; 2818 id.bits.chgseq = 0; 2819 id.bits.type = 0; 2820 id.bits.pid = 0; 2821 2822 /* 2823 * If we are booted as a cluster node, embed our nodeid. 2824 * We've already done sanity checks in rfs4_client_create() so no 2825 * need to repeat them here. 2826 */ 2827 id.bits.clnodeid = (cluster_bootflags & CLUSTER_BOOTED) ? 2828 clconf_get_nodeid() : 0; 2829 2830 return (id); 2831 } 2832 2833 /* 2834 * For use only when booted as a cluster node. 2835 * Returns TRUE if the embedded nodeid indicates that this stateid was 2836 * generated on another node. 2837 */ 2838 static int 2839 foreign_stateid(stateid_t *id) 2840 { 2841 ASSERT(cluster_bootflags & CLUSTER_BOOTED); 2842 return (id->bits.clnodeid != (uint32_t)clconf_get_nodeid()); 2843 } 2844 2845 /* 2846 * For use only when booted as a cluster node. 2847 * Returns TRUE if the embedded nodeid indicates that this clientid was 2848 * generated on another node. 2849 */ 2850 static int 2851 foreign_clientid(cid *cidp) 2852 { 2853 ASSERT(cluster_bootflags & CLUSTER_BOOTED); 2854 return (cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT != 2855 (uint32_t)clconf_get_nodeid()); 2856 } 2857 2858 /* 2859 * For use only when booted as a cluster node. 2860 * Embed our cluster nodeid into the clientid. 2861 */ 2862 static void 2863 embed_nodeid(cid *cidp) 2864 { 2865 int clnodeid; 2866 /* 2867 * Currently, our state tables are small enough that their 2868 * ids will leave enough bits free for the nodeid. If the 2869 * tables become larger, we mustn't overwrite the id. 2870 * Equally, we only have room for so many bits of nodeid, so 2871 * must check that too. 2872 */ 2873 ASSERT(cluster_bootflags & CLUSTER_BOOTED); 2874 ASSERT(cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT == 0); 2875 clnodeid = clconf_get_nodeid(); 2876 ASSERT(clnodeid <= CLUSTER_MAX_NODEID); 2877 ASSERT(clnodeid != NODEID_UNKNOWN); 2878 cidp->impl_id.c_id |= (clnodeid << CLUSTER_NODEID_SHIFT); 2879 } 2880 2881 static uint32_t 2882 state_hash(void *key) 2883 { 2884 stateid_t *ip = (stateid_t *)key; 2885 2886 return (ip->bits.ident); 2887 } 2888 2889 static bool_t 2890 state_compare(rfs4_entry_t u_entry, void *key) 2891 { 2892 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 2893 stateid_t *id = (stateid_t *)key; 2894 bool_t rc; 2895 2896 rc = (sp->rs_stateid.bits.boottime == id->bits.boottime && 2897 sp->rs_stateid.bits.ident == id->bits.ident); 2898 2899 return (rc); 2900 } 2901 2902 static void * 2903 state_mkkey(rfs4_entry_t u_entry) 2904 { 2905 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 2906 2907 return (&sp->rs_stateid); 2908 } 2909 2910 static void 2911 rfs4_state_destroy(rfs4_entry_t u_entry) 2912 { 2913 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 2914 2915 /* remove from openowner list */ 2916 rfs4_dbe_lock(sp->rs_owner->ro_dbe); 2917 list_remove(&sp->rs_owner->ro_statelist, sp); 2918 rfs4_dbe_unlock(sp->rs_owner->ro_dbe); 2919 2920 list_destroy(&sp->rs_lostatelist); 2921 2922 /* release any share locks for this stateid if it's still open */ 2923 if (!sp->rs_closed) { 2924 rfs4_dbe_lock(sp->rs_dbe); 2925 (void) rfs4_unshare(sp); 2926 rfs4_dbe_unlock(sp->rs_dbe); 2927 } 2928 2929 /* Were done with the file */ 2930 rfs4_file_rele(sp->rs_finfo); 2931 sp->rs_finfo = NULL; 2932 2933 /* And now with the openowner */ 2934 rfs4_openowner_rele(sp->rs_owner); 2935 sp->rs_owner = NULL; 2936 } 2937 2938 static void 2939 rfs4_state_rele_nounlock(rfs4_state_t *sp) 2940 { 2941 rfs4_dbe_rele(sp->rs_dbe); 2942 } 2943 2944 void 2945 rfs4_state_rele(rfs4_state_t *sp) 2946 { 2947 rw_exit(&sp->rs_finfo->rf_file_rwlock); 2948 rfs4_dbe_rele(sp->rs_dbe); 2949 } 2950 2951 static uint32_t 2952 deleg_hash(void *key) 2953 { 2954 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)key; 2955 2956 return (ADDRHASH(dsp->rds_client) ^ ADDRHASH(dsp->rds_finfo)); 2957 } 2958 2959 static bool_t 2960 deleg_compare(rfs4_entry_t u_entry, void *key) 2961 { 2962 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry; 2963 rfs4_deleg_state_t *kdsp = (rfs4_deleg_state_t *)key; 2964 2965 return (dsp->rds_client == kdsp->rds_client && 2966 dsp->rds_finfo == kdsp->rds_finfo); 2967 } 2968 2969 static void * 2970 deleg_mkkey(rfs4_entry_t u_entry) 2971 { 2972 return (u_entry); 2973 } 2974 2975 static uint32_t 2976 deleg_state_hash(void *key) 2977 { 2978 stateid_t *ip = (stateid_t *)key; 2979 2980 return (ip->bits.ident); 2981 } 2982 2983 static bool_t 2984 deleg_state_compare(rfs4_entry_t u_entry, void *key) 2985 { 2986 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry; 2987 stateid_t *id = (stateid_t *)key; 2988 bool_t rc; 2989 2990 if (id->bits.type != DELEGID) 2991 return (FALSE); 2992 2993 rc = (dsp->rds_delegid.bits.boottime == id->bits.boottime && 2994 dsp->rds_delegid.bits.ident == id->bits.ident); 2995 2996 return (rc); 2997 } 2998 2999 static void * 3000 deleg_state_mkkey(rfs4_entry_t u_entry) 3001 { 3002 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry; 3003 3004 return (&dsp->rds_delegid); 3005 } 3006 3007 static bool_t 3008 rfs4_deleg_state_expiry(rfs4_entry_t u_entry) 3009 { 3010 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry; 3011 3012 if (rfs4_dbe_is_invalid(dsp->rds_dbe)) 3013 return (TRUE); 3014 3015 if (dsp->rds_dtype == OPEN_DELEGATE_NONE) 3016 return (TRUE); 3017 3018 if ((gethrestime_sec() - dsp->rds_client->rc_last_access 3019 > rfs4_lease_time)) { 3020 rfs4_dbe_invalidate(dsp->rds_dbe); 3021 return (TRUE); 3022 } 3023 3024 return (FALSE); 3025 } 3026 3027 static bool_t 3028 rfs4_deleg_state_create(rfs4_entry_t u_entry, void *argp) 3029 { 3030 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry; 3031 rfs4_file_t *fp = ((rfs4_deleg_state_t *)argp)->rds_finfo; 3032 rfs4_client_t *cp = ((rfs4_deleg_state_t *)argp)->rds_client; 3033 3034 rfs4_dbe_hold(fp->rf_dbe); 3035 rfs4_dbe_hold(cp->rc_dbe); 3036 3037 dsp->rds_delegid = get_stateid(rfs4_dbe_getid(dsp->rds_dbe)); 3038 dsp->rds_delegid.bits.type = DELEGID; 3039 dsp->rds_finfo = fp; 3040 dsp->rds_client = cp; 3041 dsp->rds_dtype = OPEN_DELEGATE_NONE; 3042 3043 dsp->rds_time_granted = gethrestime_sec(); /* observability */ 3044 dsp->rds_time_revoked = 0; 3045 3046 list_link_init(&dsp->rds_node); 3047 3048 return (TRUE); 3049 } 3050 3051 static void 3052 rfs4_deleg_state_destroy(rfs4_entry_t u_entry) 3053 { 3054 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry; 3055 3056 /* return delegation if necessary */ 3057 rfs4_return_deleg(dsp, FALSE); 3058 3059 /* Were done with the file */ 3060 rfs4_file_rele(dsp->rds_finfo); 3061 dsp->rds_finfo = NULL; 3062 3063 /* And now with the openowner */ 3064 rfs4_client_rele(dsp->rds_client); 3065 dsp->rds_client = NULL; 3066 } 3067 3068 rfs4_deleg_state_t * 3069 rfs4_finddeleg(rfs4_state_t *sp, bool_t *create) 3070 { 3071 rfs4_deleg_state_t ds, *dsp; 3072 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 3073 3074 ds.rds_client = sp->rs_owner->ro_client; 3075 ds.rds_finfo = sp->rs_finfo; 3076 3077 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_idx, &ds, 3078 create, &ds, RFS4_DBS_VALID); 3079 3080 return (dsp); 3081 } 3082 3083 rfs4_deleg_state_t * 3084 rfs4_finddelegstate(stateid_t *id) 3085 { 3086 rfs4_deleg_state_t *dsp; 3087 bool_t create = FALSE; 3088 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 3089 3090 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_state_idx, 3091 id, &create, NULL, RFS4_DBS_VALID); 3092 3093 return (dsp); 3094 } 3095 3096 void 3097 rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp) 3098 { 3099 rfs4_dbe_rele(dsp->rds_dbe); 3100 } 3101 3102 void 3103 rfs4_update_lock_sequence(rfs4_lo_state_t *lsp) 3104 { 3105 3106 rfs4_dbe_lock(lsp->rls_dbe); 3107 3108 /* 3109 * If we are skipping sequence id checking, this means that 3110 * this is the first lock request and therefore the sequence 3111 * id does not need to be updated. This only happens on the 3112 * first lock request for a lockowner 3113 */ 3114 if (!lsp->rls_skip_seqid_check) 3115 lsp->rls_seqid++; 3116 3117 rfs4_dbe_unlock(lsp->rls_dbe); 3118 } 3119 3120 void 3121 rfs4_update_lock_resp(rfs4_lo_state_t *lsp, nfs_resop4 *resp) 3122 { 3123 3124 rfs4_dbe_lock(lsp->rls_dbe); 3125 3126 rfs4_free_reply(&lsp->rls_reply); 3127 3128 rfs4_copy_reply(&lsp->rls_reply, resp); 3129 3130 rfs4_dbe_unlock(lsp->rls_dbe); 3131 } 3132 3133 void 3134 rfs4_free_opens(rfs4_openowner_t *oo, bool_t invalidate, 3135 bool_t close_of_client) 3136 { 3137 rfs4_state_t *sp; 3138 3139 rfs4_dbe_lock(oo->ro_dbe); 3140 3141 for (sp = list_head(&oo->ro_statelist); sp != NULL; 3142 sp = list_next(&oo->ro_statelist, sp)) { 3143 rfs4_state_close(sp, FALSE, close_of_client, CRED()); 3144 if (invalidate == TRUE) 3145 rfs4_dbe_invalidate(sp->rs_dbe); 3146 } 3147 3148 rfs4_dbe_invalidate(oo->ro_dbe); 3149 rfs4_dbe_unlock(oo->ro_dbe); 3150 } 3151 3152 static uint32_t 3153 state_owner_file_hash(void *key) 3154 { 3155 rfs4_state_t *sp = key; 3156 3157 return (ADDRHASH(sp->rs_owner) ^ ADDRHASH(sp->rs_finfo)); 3158 } 3159 3160 static bool_t 3161 state_owner_file_compare(rfs4_entry_t u_entry, void *key) 3162 { 3163 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 3164 rfs4_state_t *arg = key; 3165 3166 if (sp->rs_closed == TRUE) 3167 return (FALSE); 3168 3169 return (arg->rs_owner == sp->rs_owner && arg->rs_finfo == sp->rs_finfo); 3170 } 3171 3172 static void * 3173 state_owner_file_mkkey(rfs4_entry_t u_entry) 3174 { 3175 return (u_entry); 3176 } 3177 3178 static uint32_t 3179 state_file_hash(void *key) 3180 { 3181 return (ADDRHASH(key)); 3182 } 3183 3184 static bool_t 3185 state_file_compare(rfs4_entry_t u_entry, void *key) 3186 { 3187 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 3188 rfs4_file_t *fp = key; 3189 3190 if (sp->rs_closed == TRUE) 3191 return (FALSE); 3192 3193 return (fp == sp->rs_finfo); 3194 } 3195 3196 static void * 3197 state_file_mkkey(rfs4_entry_t u_entry) 3198 { 3199 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 3200 3201 return (sp->rs_finfo); 3202 } 3203 3204 rfs4_state_t * 3205 rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp, 3206 bool_t *create) 3207 { 3208 rfs4_state_t *sp; 3209 rfs4_state_t key; 3210 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 3211 3212 key.rs_owner = oo; 3213 key.rs_finfo = fp; 3214 3215 sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_owner_file_idx, 3216 &key, create, &key, RFS4_DBS_VALID); 3217 3218 return (sp); 3219 } 3220 3221 /* This returns ANY state struct that refers to this file */ 3222 static rfs4_state_t * 3223 rfs4_findstate_by_file(rfs4_file_t *fp) 3224 { 3225 bool_t create = FALSE; 3226 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 3227 3228 return ((rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_file_idx, fp, 3229 &create, fp, RFS4_DBS_VALID)); 3230 } 3231 3232 static bool_t 3233 rfs4_state_expiry(rfs4_entry_t u_entry) 3234 { 3235 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 3236 3237 if (rfs4_dbe_is_invalid(sp->rs_dbe)) 3238 return (TRUE); 3239 3240 if (sp->rs_closed == TRUE && 3241 ((gethrestime_sec() - rfs4_dbe_get_timerele(sp->rs_dbe)) 3242 > rfs4_lease_time)) 3243 return (TRUE); 3244 3245 return ((gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access 3246 > rfs4_lease_time)); 3247 } 3248 3249 static bool_t 3250 rfs4_state_create(rfs4_entry_t u_entry, void *argp) 3251 { 3252 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 3253 rfs4_file_t *fp = ((rfs4_state_t *)argp)->rs_finfo; 3254 rfs4_openowner_t *oo = ((rfs4_state_t *)argp)->rs_owner; 3255 3256 rfs4_dbe_hold(fp->rf_dbe); 3257 rfs4_dbe_hold(oo->ro_dbe); 3258 sp->rs_stateid = get_stateid(rfs4_dbe_getid(sp->rs_dbe)); 3259 sp->rs_stateid.bits.type = OPENID; 3260 sp->rs_owner = oo; 3261 sp->rs_finfo = fp; 3262 3263 list_create(&sp->rs_lostatelist, sizeof (rfs4_lo_state_t), 3264 offsetof(rfs4_lo_state_t, rls_node)); 3265 3266 /* Insert state on per open owner's list */ 3267 rfs4_dbe_lock(oo->ro_dbe); 3268 list_insert_tail(&oo->ro_statelist, sp); 3269 rfs4_dbe_unlock(oo->ro_dbe); 3270 3271 return (TRUE); 3272 } 3273 3274 static rfs4_state_t * 3275 rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp) 3276 { 3277 rfs4_state_t *sp; 3278 bool_t create = FALSE; 3279 nfs4_srv_t *nsrv4 = nfs4_get_srv(); 3280 3281 sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_idx, id, 3282 &create, NULL, find_invalid); 3283 if (lock_fp == TRUE && sp != NULL) 3284 rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER); 3285 3286 return (sp); 3287 } 3288 3289 void 3290 rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, bool_t close_of_client, 3291 cred_t *cr) 3292 { 3293 /* Remove the associated lo_state owners */ 3294 if (!lock_held) 3295 rfs4_dbe_lock(sp->rs_dbe); 3296 3297 /* 3298 * If refcnt == 0, the dbe is about to be destroyed. 3299 * lock state will be released by the reaper thread. 3300 */ 3301 3302 if (rfs4_dbe_refcnt(sp->rs_dbe) > 0) { 3303 if (sp->rs_closed == FALSE) { 3304 rfs4_release_share_lock_state(sp, cr, close_of_client); 3305 sp->rs_closed = TRUE; 3306 } 3307 } 3308 3309 if (!lock_held) 3310 rfs4_dbe_unlock(sp->rs_dbe); 3311 } 3312 3313 /* 3314 * Remove all state associated with the given client. 3315 */ 3316 void 3317 rfs4_client_state_remove(rfs4_client_t *cp) 3318 { 3319 rfs4_openowner_t *oo; 3320 3321 rfs4_dbe_lock(cp->rc_dbe); 3322 3323 for (oo = list_head(&cp->rc_openownerlist); oo != NULL; 3324 oo = list_next(&cp->rc_openownerlist, oo)) { 3325 rfs4_free_opens(oo, TRUE, TRUE); 3326 } 3327 3328 rfs4_dbe_unlock(cp->rc_dbe); 3329 } 3330 3331 void 3332 rfs4_client_close(rfs4_client_t *cp) 3333 { 3334 /* Mark client as going away. */ 3335 rfs4_dbe_lock(cp->rc_dbe); 3336 rfs4_dbe_invalidate(cp->rc_dbe); 3337 rfs4_dbe_unlock(cp->rc_dbe); 3338 3339 rfs4_client_state_remove(cp); 3340 3341 /* Release the client */ 3342 rfs4_client_rele(cp); 3343 } 3344 3345 nfsstat4 3346 rfs4_check_clientid(clientid4 *cp, int setclid_confirm) 3347 { 3348 cid *cidp = (cid *) cp; 3349 nfs4_srv_t *nsrv4; 3350 3351 nsrv4 = nfs4_get_srv(); 3352 3353 /* 3354 * If we are booted as a cluster node, check the embedded nodeid. 3355 * If it indicates that this clientid was generated on another node, 3356 * inform the client accordingly. 3357 */ 3358 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp)) 3359 return (NFS4ERR_STALE_CLIENTID); 3360 3361 /* 3362 * If the server start time matches the time provided 3363 * by the client (via the clientid) and this is NOT a 3364 * setclientid_confirm then return EXPIRED. 3365 */ 3366 if (!setclid_confirm && 3367 cidp->impl_id.start_time == nsrv4->rfs4_start_time) 3368 return (NFS4ERR_EXPIRED); 3369 3370 return (NFS4ERR_STALE_CLIENTID); 3371 } 3372 3373 /* 3374 * This is used when a stateid has not been found amongst the 3375 * current server's state. Check the stateid to see if it 3376 * was from this server instantiation or not. 3377 */ 3378 static nfsstat4 3379 what_stateid_error(stateid_t *id, stateid_type_t type) 3380 { 3381 nfs4_srv_t *nsrv4; 3382 3383 nsrv4 = nfs4_get_srv(); 3384 3385 /* If we are booted as a cluster node, was stateid locally generated? */ 3386 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id)) 3387 return (NFS4ERR_STALE_STATEID); 3388 3389 /* If types don't match then no use checking further */ 3390 if (type != id->bits.type) 3391 return (NFS4ERR_BAD_STATEID); 3392 3393 /* From a different server instantiation, return STALE */ 3394 if (id->bits.boottime != nsrv4->rfs4_start_time) 3395 return (NFS4ERR_STALE_STATEID); 3396 3397 /* 3398 * From this server but the state is most likely beyond lease 3399 * timeout: return NFS4ERR_EXPIRED. However, there is the 3400 * case of a delegation stateid. For delegations, there is a 3401 * case where the state can be removed without the client's 3402 * knowledge/consent: revocation. In the case of delegation 3403 * revocation, the delegation state will be removed and will 3404 * not be found. If the client does something like a 3405 * DELEGRETURN or even a READ/WRITE with a delegatoin stateid 3406 * that has been revoked, the server should return BAD_STATEID 3407 * instead of the more common EXPIRED error. 3408 */ 3409 if (id->bits.boottime == nsrv4->rfs4_start_time) { 3410 if (type == DELEGID) 3411 return (NFS4ERR_BAD_STATEID); 3412 else 3413 return (NFS4ERR_EXPIRED); 3414 } 3415 3416 return (NFS4ERR_BAD_STATEID); 3417 } 3418 3419 /* 3420 * Used later on to find the various state structs. When called from 3421 * rfs4_check_stateid()->rfs4_get_all_state(), no file struct lock is 3422 * taken (it is not needed) and helps on the read/write path with 3423 * respect to performance. 3424 */ 3425 static nfsstat4 3426 rfs4_get_state_lockit(stateid4 *stateid, rfs4_state_t **spp, 3427 rfs4_dbsearch_type_t find_invalid, bool_t lock_fp) 3428 { 3429 stateid_t *id = (stateid_t *)stateid; 3430 rfs4_state_t *sp; 3431 3432 *spp = NULL; 3433 3434 /* If we are booted as a cluster node, was stateid locally generated? */ 3435 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id)) 3436 return (NFS4ERR_STALE_STATEID); 3437 3438 sp = rfs4_findstate(id, find_invalid, lock_fp); 3439 if (sp == NULL) { 3440 return (what_stateid_error(id, OPENID)); 3441 } 3442 3443 if (rfs4_lease_expired(sp->rs_owner->ro_client)) { 3444 if (lock_fp == TRUE) 3445 rfs4_state_rele(sp); 3446 else 3447 rfs4_state_rele_nounlock(sp); 3448 return (NFS4ERR_EXPIRED); 3449 } 3450 3451 *spp = sp; 3452 3453 return (NFS4_OK); 3454 } 3455 3456 nfsstat4 3457 rfs4_get_state(stateid4 *stateid, rfs4_state_t **spp, 3458 rfs4_dbsearch_type_t find_invalid) 3459 { 3460 return (rfs4_get_state_lockit(stateid, spp, find_invalid, TRUE)); 3461 } 3462 3463 int 3464 rfs4_check_stateid_seqid(rfs4_state_t *sp, stateid4 *stateid) 3465 { 3466 stateid_t *id = (stateid_t *)stateid; 3467 3468 if (rfs4_lease_expired(sp->rs_owner->ro_client)) 3469 return (NFS4_CHECK_STATEID_EXPIRED); 3470 3471 /* Stateid is some time in the future - that's bad */ 3472 if (sp->rs_stateid.bits.chgseq < id->bits.chgseq) 3473 return (NFS4_CHECK_STATEID_BAD); 3474 3475 if (sp->rs_stateid.bits.chgseq == id->bits.chgseq + 1) 3476 return (NFS4_CHECK_STATEID_REPLAY); 3477 3478 /* Stateid is some time in the past - that's old */ 3479 if (sp->rs_stateid.bits.chgseq > id->bits.chgseq) 3480 return (NFS4_CHECK_STATEID_OLD); 3481 3482 /* Caller needs to know about confirmation before closure */ 3483 if (sp->rs_owner->ro_need_confirm) 3484 return (NFS4_CHECK_STATEID_UNCONFIRMED); 3485 3486 if (sp->rs_closed == TRUE) 3487 return (NFS4_CHECK_STATEID_CLOSED); 3488 3489 return (NFS4_CHECK_STATEID_OKAY); 3490 } 3491 3492 int 3493 rfs4_check_lo_stateid_seqid(rfs4_lo_state_t *lsp, stateid4 *stateid) 3494 { 3495 stateid_t *id = (stateid_t *)stateid; 3496 3497 if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client)) 3498 return (NFS4_CHECK_STATEID_EXPIRED); 3499 3500 /* Stateid is some time in the future - that's bad */ 3501 if (lsp->rls_lockid.bits.chgseq < id->bits.chgseq) 3502 return (NFS4_CHECK_STATEID_BAD); 3503 3504 if (lsp->rls_lockid.bits.chgseq == id->bits.chgseq + 1) 3505 return (NFS4_CHECK_STATEID_REPLAY); 3506 3507 /* Stateid is some time in the past - that's old */ 3508 if (lsp->rls_lockid.bits.chgseq > id->bits.chgseq) 3509 return (NFS4_CHECK_STATEID_OLD); 3510 3511 if (lsp->rls_state->rs_closed == TRUE) 3512 return (NFS4_CHECK_STATEID_CLOSED); 3513 3514 return (NFS4_CHECK_STATEID_OKAY); 3515 } 3516 3517 nfsstat4 3518 rfs4_get_deleg_state(stateid4 *stateid, rfs4_deleg_state_t **dspp) 3519 { 3520 stateid_t *id = (stateid_t *)stateid; 3521 rfs4_deleg_state_t *dsp; 3522 3523 *dspp = NULL; 3524 3525 /* If we are booted as a cluster node, was stateid locally generated? */ 3526 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id)) 3527 return (NFS4ERR_STALE_STATEID); 3528 3529 dsp = rfs4_finddelegstate(id); 3530 if (dsp == NULL) { 3531 return (what_stateid_error(id, DELEGID)); 3532 } 3533 3534 if (rfs4_lease_expired(dsp->rds_client)) { 3535 rfs4_deleg_state_rele(dsp); 3536 return (NFS4ERR_EXPIRED); 3537 } 3538 3539 *dspp = dsp; 3540 3541 return (NFS4_OK); 3542 } 3543 3544 nfsstat4 3545 rfs4_get_lo_state(stateid4 *stateid, rfs4_lo_state_t **lspp, bool_t lock_fp) 3546 { 3547 stateid_t *id = (stateid_t *)stateid; 3548 rfs4_lo_state_t *lsp; 3549 3550 *lspp = NULL; 3551 3552 /* If we are booted as a cluster node, was stateid locally generated? */ 3553 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id)) 3554 return (NFS4ERR_STALE_STATEID); 3555 3556 lsp = rfs4_findlo_state(id, lock_fp); 3557 if (lsp == NULL) { 3558 return (what_stateid_error(id, LOCKID)); 3559 } 3560 3561 if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client)) { 3562 rfs4_lo_state_rele(lsp, lock_fp); 3563 return (NFS4ERR_EXPIRED); 3564 } 3565 3566 *lspp = lsp; 3567 3568 return (NFS4_OK); 3569 } 3570 3571 static nfsstat4 3572 rfs4_get_all_state(stateid4 *sid, rfs4_state_t **spp, 3573 rfs4_deleg_state_t **dspp, rfs4_lo_state_t **lspp) 3574 { 3575 rfs4_state_t *sp = NULL; 3576 rfs4_deleg_state_t *dsp = NULL; 3577 rfs4_lo_state_t *lsp = NULL; 3578 stateid_t *id; 3579 nfsstat4 status; 3580 3581 *spp = NULL; *dspp = NULL; *lspp = NULL; 3582 3583 id = (stateid_t *)sid; 3584 switch (id->bits.type) { 3585 case OPENID: 3586 status = rfs4_get_state_lockit(sid, &sp, FALSE, FALSE); 3587 break; 3588 case DELEGID: 3589 status = rfs4_get_deleg_state(sid, &dsp); 3590 break; 3591 case LOCKID: 3592 status = rfs4_get_lo_state(sid, &lsp, FALSE); 3593 if (status == NFS4_OK) { 3594 sp = lsp->rls_state; 3595 rfs4_dbe_hold(sp->rs_dbe); 3596 } 3597 break; 3598 default: 3599 status = NFS4ERR_BAD_STATEID; 3600 } 3601 3602 if (status == NFS4_OK) { 3603 *spp = sp; 3604 *dspp = dsp; 3605 *lspp = lsp; 3606 } 3607 3608 return (status); 3609 } 3610 3611 /* 3612 * Given the I/O mode (FREAD or FWRITE), this checks whether the 3613 * rfs4_state_t struct has access to do this operation and if so 3614 * return NFS4_OK; otherwise the proper NFSv4 error is returned. 3615 */ 3616 nfsstat4 3617 rfs4_state_has_access(rfs4_state_t *sp, int mode, vnode_t *vp) 3618 { 3619 nfsstat4 stat = NFS4_OK; 3620 rfs4_file_t *fp; 3621 bool_t create = FALSE; 3622 3623 rfs4_dbe_lock(sp->rs_dbe); 3624 if (mode == FWRITE) { 3625 if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)) { 3626 stat = NFS4ERR_OPENMODE; 3627 } 3628 } else if (mode == FREAD) { 3629 if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)) { 3630 /* 3631 * If we have OPENed the file with DENYing access 3632 * to both READ and WRITE then no one else could 3633 * have OPENed the file, hence no conflicting READ 3634 * deny. This check is merely an optimization. 3635 */ 3636 if (sp->rs_share_deny == OPEN4_SHARE_DENY_BOTH) 3637 goto out; 3638 3639 /* Check against file struct's DENY mode */ 3640 fp = rfs4_findfile(vp, NULL, &create); 3641 if (fp != NULL) { 3642 int deny_read = 0; 3643 rfs4_dbe_lock(fp->rf_dbe); 3644 /* 3645 * Check if any other open owner has the file 3646 * OPENed with deny READ. 3647 */ 3648 if (sp->rs_share_deny & OPEN4_SHARE_DENY_READ) 3649 deny_read = 1; 3650 ASSERT(fp->rf_deny_read >= deny_read); 3651 if (fp->rf_deny_read > deny_read) 3652 stat = NFS4ERR_OPENMODE; 3653 rfs4_dbe_unlock(fp->rf_dbe); 3654 rfs4_file_rele(fp); 3655 } 3656 } 3657 } else { 3658 /* Illegal I/O mode */ 3659 stat = NFS4ERR_INVAL; 3660 } 3661 out: 3662 rfs4_dbe_unlock(sp->rs_dbe); 3663 return (stat); 3664 } 3665 3666 /* 3667 * Given the I/O mode (FREAD or FWRITE), the vnode, the stateid and whether 3668 * the file is being truncated, return NFS4_OK if allowed or appropriate 3669 * V4 error if not. Note NFS4ERR_DELAY will be returned and a recall on 3670 * the associated file will be done if the I/O is not consistent with any 3671 * delegation in effect on the file. Should be holding VOP_RWLOCK, either 3672 * as reader or writer as appropriate. rfs4_op_open will acquire the 3673 * VOP_RWLOCK as writer when setting up delegation. If the stateid is bad 3674 * this routine will return NFS4ERR_BAD_STATEID. In addition, through the 3675 * deleg parameter, we will return whether a write delegation is held by 3676 * the client associated with this stateid. 3677 * If the server instance associated with the relevant client is in its 3678 * grace period, return NFS4ERR_GRACE. 3679 */ 3680 3681 nfsstat4 3682 rfs4_check_stateid(int mode, vnode_t *vp, 3683 stateid4 *stateid, bool_t trunc, bool_t *deleg, 3684 bool_t do_access, caller_context_t *ct) 3685 { 3686 rfs4_file_t *fp; 3687 bool_t create = FALSE; 3688 rfs4_state_t *sp; 3689 rfs4_deleg_state_t *dsp; 3690 rfs4_lo_state_t *lsp; 3691 stateid_t *id = (stateid_t *)stateid; 3692 nfsstat4 stat = NFS4_OK; 3693 3694 if (ct != NULL) { 3695 ct->cc_sysid = 0; 3696 ct->cc_pid = 0; 3697 ct->cc_caller_id = nfs4_srv_caller_id; 3698 ct->cc_flags = CC_DONTBLOCK; 3699 } 3700 3701 if (ISSPECIAL(stateid)) { 3702 fp = rfs4_findfile(vp, NULL, &create); 3703 if (fp == NULL) 3704 return (NFS4_OK); 3705 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) { 3706 rfs4_file_rele(fp); 3707 return (NFS4_OK); 3708 } 3709 if (mode == FWRITE || 3710 fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) { 3711 rfs4_recall_deleg(fp, trunc, NULL); 3712 rfs4_file_rele(fp); 3713 return (NFS4ERR_DELAY); 3714 } 3715 rfs4_file_rele(fp); 3716 return (NFS4_OK); 3717 } else { 3718 stat = rfs4_get_all_state(stateid, &sp, &dsp, &lsp); 3719 if (stat != NFS4_OK) 3720 return (stat); 3721 if (lsp != NULL) { 3722 /* Is associated server instance in its grace period? */ 3723 if (rfs4_clnt_in_grace(lsp->rls_locker->rl_client)) { 3724 rfs4_lo_state_rele(lsp, FALSE); 3725 if (sp != NULL) 3726 rfs4_state_rele_nounlock(sp); 3727 return (NFS4ERR_GRACE); 3728 } 3729 if (id->bits.type == LOCKID) { 3730 /* Seqid in the future? - that's bad */ 3731 if (lsp->rls_lockid.bits.chgseq < 3732 id->bits.chgseq) { 3733 rfs4_lo_state_rele(lsp, FALSE); 3734 if (sp != NULL) 3735 rfs4_state_rele_nounlock(sp); 3736 return (NFS4ERR_BAD_STATEID); 3737 } 3738 /* Seqid in the past? - that's old */ 3739 if (lsp->rls_lockid.bits.chgseq > 3740 id->bits.chgseq) { 3741 rfs4_lo_state_rele(lsp, FALSE); 3742 if (sp != NULL) 3743 rfs4_state_rele_nounlock(sp); 3744 return (NFS4ERR_OLD_STATEID); 3745 } 3746 /* Ensure specified filehandle matches */ 3747 if (lsp->rls_state->rs_finfo->rf_vp != vp) { 3748 rfs4_lo_state_rele(lsp, FALSE); 3749 if (sp != NULL) 3750 rfs4_state_rele_nounlock(sp); 3751 return (NFS4ERR_BAD_STATEID); 3752 } 3753 } 3754 if (ct != NULL) { 3755 ct->cc_sysid = 3756 lsp->rls_locker->rl_client->rc_sysidt; 3757 ct->cc_pid = lsp->rls_locker->rl_pid; 3758 } 3759 rfs4_lo_state_rele(lsp, FALSE); 3760 } 3761 3762 /* Stateid provided was an "open" stateid */ 3763 if (sp != NULL) { 3764 /* Is associated server instance in its grace period? */ 3765 if (rfs4_clnt_in_grace(sp->rs_owner->ro_client)) { 3766 rfs4_state_rele_nounlock(sp); 3767 return (NFS4ERR_GRACE); 3768 } 3769 if (id->bits.type == OPENID) { 3770 /* Seqid in the future? - that's bad */ 3771 if (sp->rs_stateid.bits.chgseq < 3772 id->bits.chgseq) { 3773 rfs4_state_rele_nounlock(sp); 3774 return (NFS4ERR_BAD_STATEID); 3775 } 3776 /* Seqid in the past - that's old */ 3777 if (sp->rs_stateid.bits.chgseq > 3778 id->bits.chgseq) { 3779 rfs4_state_rele_nounlock(sp); 3780 return (NFS4ERR_OLD_STATEID); 3781 } 3782 } 3783 /* Ensure specified filehandle matches */ 3784 if (sp->rs_finfo->rf_vp != vp) { 3785 rfs4_state_rele_nounlock(sp); 3786 return (NFS4ERR_BAD_STATEID); 3787 } 3788 3789 if (sp->rs_owner->ro_need_confirm) { 3790 rfs4_state_rele_nounlock(sp); 3791 return (NFS4ERR_BAD_STATEID); 3792 } 3793 3794 if (sp->rs_closed == TRUE) { 3795 rfs4_state_rele_nounlock(sp); 3796 return (NFS4ERR_OLD_STATEID); 3797 } 3798 3799 if (do_access) 3800 stat = rfs4_state_has_access(sp, mode, vp); 3801 else 3802 stat = NFS4_OK; 3803 3804 /* 3805 * Return whether this state has write 3806 * delegation if desired 3807 */ 3808 if (deleg && (sp->rs_finfo->rf_dinfo.rd_dtype == 3809 OPEN_DELEGATE_WRITE)) 3810 *deleg = TRUE; 3811 3812 /* 3813 * We got a valid stateid, so we update the 3814 * lease on the client. Ideally we would like 3815 * to do this after the calling op succeeds, 3816 * but for now this will be good 3817 * enough. Callers of this routine are 3818 * currently insulated from the state stuff. 3819 */ 3820 rfs4_update_lease(sp->rs_owner->ro_client); 3821 3822 /* 3823 * If a delegation is present on this file and 3824 * this is a WRITE, then update the lastwrite 3825 * time to indicate that activity is present. 3826 */ 3827 if (sp->rs_finfo->rf_dinfo.rd_dtype == 3828 OPEN_DELEGATE_WRITE && 3829 mode == FWRITE) { 3830 sp->rs_finfo->rf_dinfo.rd_time_lastwrite = 3831 gethrestime_sec(); 3832 } 3833 3834 rfs4_state_rele_nounlock(sp); 3835 3836 return (stat); 3837 } 3838 3839 if (dsp != NULL) { 3840 /* Is associated server instance in its grace period? */ 3841 if (rfs4_clnt_in_grace(dsp->rds_client)) { 3842 rfs4_deleg_state_rele(dsp); 3843 return (NFS4ERR_GRACE); 3844 } 3845 if (dsp->rds_delegid.bits.chgseq != id->bits.chgseq) { 3846 rfs4_deleg_state_rele(dsp); 3847 return (NFS4ERR_BAD_STATEID); 3848 } 3849 3850 /* Ensure specified filehandle matches */ 3851 if (dsp->rds_finfo->rf_vp != vp) { 3852 rfs4_deleg_state_rele(dsp); 3853 return (NFS4ERR_BAD_STATEID); 3854 } 3855 /* 3856 * Return whether this state has write 3857 * delegation if desired 3858 */ 3859 if (deleg && (dsp->rds_finfo->rf_dinfo.rd_dtype == 3860 OPEN_DELEGATE_WRITE)) 3861 *deleg = TRUE; 3862 3863 rfs4_update_lease(dsp->rds_client); 3864 3865 /* 3866 * If a delegation is present on this file and 3867 * this is a WRITE, then update the lastwrite 3868 * time to indicate that activity is present. 3869 */ 3870 if (dsp->rds_finfo->rf_dinfo.rd_dtype == 3871 OPEN_DELEGATE_WRITE && mode == FWRITE) { 3872 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite = 3873 gethrestime_sec(); 3874 } 3875 3876 /* 3877 * XXX - what happens if this is a WRITE and the 3878 * delegation type of for READ. 3879 */ 3880 rfs4_deleg_state_rele(dsp); 3881 3882 return (stat); 3883 } 3884 /* 3885 * If we got this far, something bad happened 3886 */ 3887 return (NFS4ERR_BAD_STATEID); 3888 } 3889 } 3890 3891 3892 /* 3893 * This is a special function in that for the file struct provided the 3894 * server wants to remove/close all current state associated with the 3895 * file. The prime use of this would be with OP_REMOVE to force the 3896 * release of state and particularly of file locks. 3897 * 3898 * There is an assumption that there is no delegations outstanding on 3899 * this file at this point. The caller should have waited for those 3900 * to be returned or revoked. 3901 */ 3902 void 3903 rfs4_close_all_state(rfs4_file_t *fp) 3904 { 3905 rfs4_state_t *sp; 3906 3907 rfs4_dbe_lock(fp->rf_dbe); 3908 3909 #ifdef DEBUG 3910 /* only applies when server is handing out delegations */ 3911 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE) 3912 ASSERT(fp->rf_dinfo.rd_hold_grant > 0); 3913 #endif 3914 3915 /* No delegations for this file */ 3916 ASSERT(list_is_empty(&fp->rf_delegstatelist)); 3917 3918 /* Make sure that it can not be found */ 3919 rfs4_dbe_invalidate(fp->rf_dbe); 3920 3921 if (fp->rf_vp == NULL) { 3922 rfs4_dbe_unlock(fp->rf_dbe); 3923 return; 3924 } 3925 rfs4_dbe_unlock(fp->rf_dbe); 3926 3927 /* 3928 * Hold as writer to prevent other server threads from 3929 * processing requests related to the file while all state is 3930 * being removed. 3931 */ 3932 rw_enter(&fp->rf_file_rwlock, RW_WRITER); 3933 3934 /* Remove ALL state from the file */ 3935 while (sp = rfs4_findstate_by_file(fp)) { 3936 rfs4_state_close(sp, FALSE, FALSE, CRED()); 3937 rfs4_state_rele_nounlock(sp); 3938 } 3939 3940 /* 3941 * This is only safe since there are no further references to 3942 * the file. 3943 */ 3944 rfs4_dbe_lock(fp->rf_dbe); 3945 if (fp->rf_vp) { 3946 vnode_t *vp = fp->rf_vp; 3947 3948 mutex_enter(&vp->v_vsd_lock); 3949 (void) vsd_set(vp, nfs4_srv_vkey, NULL); 3950 mutex_exit(&vp->v_vsd_lock); 3951 VN_RELE(vp); 3952 fp->rf_vp = NULL; 3953 } 3954 rfs4_dbe_unlock(fp->rf_dbe); 3955 3956 /* Finally let other references to proceed */ 3957 rw_exit(&fp->rf_file_rwlock); 3958 } 3959 3960 /* 3961 * This function is used as a target for the rfs4_dbe_walk() call 3962 * below. The purpose of this function is to see if the 3963 * lockowner_state refers to a file that resides within the exportinfo 3964 * export. If so, then remove the lock_owner state (file locks and 3965 * share "locks") for this object since the intent is the server is 3966 * unexporting the specified directory. Be sure to invalidate the 3967 * object after the state has been released 3968 */ 3969 static void 3970 rfs4_lo_state_walk_callout(rfs4_entry_t u_entry, void *e) 3971 { 3972 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry; 3973 struct exportinfo *exi = (struct exportinfo *)e; 3974 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp; 3975 fhandle_t *efhp; 3976 3977 efhp = (fhandle_t *)&exi->exi_fh; 3978 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4; 3979 3980 FH_TO_FMT4(efhp, exi_fhp); 3981 3982 finfo_fhp = (nfs_fh4_fmt_t *)lsp->rls_state->rs_finfo-> 3983 rf_filehandle.nfs_fh4_val; 3984 3985 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) && 3986 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata, 3987 exi_fhp->fh4_xlen) == 0) { 3988 rfs4_state_close(lsp->rls_state, FALSE, FALSE, CRED()); 3989 rfs4_dbe_invalidate(lsp->rls_dbe); 3990 rfs4_dbe_invalidate(lsp->rls_state->rs_dbe); 3991 } 3992 } 3993 3994 /* 3995 * This function is used as a target for the rfs4_dbe_walk() call 3996 * below. The purpose of this function is to see if the state refers 3997 * to a file that resides within the exportinfo export. If so, then 3998 * remove the open state for this object since the intent is the 3999 * server is unexporting the specified directory. The main result for 4000 * this type of entry is to invalidate it such it will not be found in 4001 * the future. 4002 */ 4003 static void 4004 rfs4_state_walk_callout(rfs4_entry_t u_entry, void *e) 4005 { 4006 rfs4_state_t *sp = (rfs4_state_t *)u_entry; 4007 struct exportinfo *exi = (struct exportinfo *)e; 4008 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp; 4009 fhandle_t *efhp; 4010 4011 efhp = (fhandle_t *)&exi->exi_fh; 4012 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4; 4013 4014 FH_TO_FMT4(efhp, exi_fhp); 4015 4016 finfo_fhp = 4017 (nfs_fh4_fmt_t *)sp->rs_finfo->rf_filehandle.nfs_fh4_val; 4018 4019 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) && 4020 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata, 4021 exi_fhp->fh4_xlen) == 0) { 4022 rfs4_state_close(sp, TRUE, FALSE, CRED()); 4023 rfs4_dbe_invalidate(sp->rs_dbe); 4024 } 4025 } 4026 4027 /* 4028 * This function is used as a target for the rfs4_dbe_walk() call 4029 * below. The purpose of this function is to see if the state refers 4030 * to a file that resides within the exportinfo export. If so, then 4031 * remove the deleg state for this object since the intent is the 4032 * server is unexporting the specified directory. The main result for 4033 * this type of entry is to invalidate it such it will not be found in 4034 * the future. 4035 */ 4036 static void 4037 rfs4_deleg_state_walk_callout(rfs4_entry_t u_entry, void *e) 4038 { 4039 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry; 4040 struct exportinfo *exi = (struct exportinfo *)e; 4041 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp; 4042 fhandle_t *efhp; 4043 4044 efhp = (fhandle_t *)&exi->exi_fh; 4045 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4; 4046 4047 FH_TO_FMT4(efhp, exi_fhp); 4048 4049 finfo_fhp = 4050 (nfs_fh4_fmt_t *)dsp->rds_finfo->rf_filehandle.nfs_fh4_val; 4051 4052 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) && 4053 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata, 4054 exi_fhp->fh4_xlen) == 0) { 4055 rfs4_dbe_invalidate(dsp->rds_dbe); 4056 } 4057 } 4058 4059 /* 4060 * This function is used as a target for the rfs4_dbe_walk() call 4061 * below. The purpose of this function is to see if the state refers 4062 * to a file that resides within the exportinfo export. If so, then 4063 * release vnode hold for this object since the intent is the server 4064 * is unexporting the specified directory. Invalidation will prevent 4065 * this struct from being found in the future. 4066 */ 4067 static void 4068 rfs4_file_walk_callout(rfs4_entry_t u_entry, void *e) 4069 { 4070 rfs4_file_t *fp = (rfs4_file_t *)u_entry; 4071 struct exportinfo *exi = (struct exportinfo *)e; 4072 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp; 4073 fhandle_t *efhp; 4074 4075 efhp = (fhandle_t *)&exi->exi_fh; 4076 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4; 4077 4078 FH_TO_FMT4(efhp, exi_fhp); 4079 4080 finfo_fhp = (nfs_fh4_fmt_t *)fp->rf_filehandle.nfs_fh4_val; 4081 4082 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) && 4083 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata, 4084 exi_fhp->fh4_xlen) == 0) { 4085 if (fp->rf_vp) { 4086 vnode_t *vp = fp->rf_vp; 4087 4088 /* 4089 * don't leak monitors and remove the reference 4090 * put on the vnode when the delegation was granted. 4091 */ 4092 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ) { 4093 (void) fem_uninstall(vp, deleg_rdops, 4094 (void *)fp); 4095 vn_open_downgrade(vp, FREAD); 4096 } else if (fp->rf_dinfo.rd_dtype == 4097 OPEN_DELEGATE_WRITE) { 4098 (void) fem_uninstall(vp, deleg_wrops, 4099 (void *)fp); 4100 vn_open_downgrade(vp, FREAD|FWRITE); 4101 } 4102 mutex_enter(&vp->v_vsd_lock); 4103 (void) vsd_set(vp, nfs4_srv_vkey, NULL); 4104 mutex_exit(&vp->v_vsd_lock); 4105 VN_RELE(vp); 4106 fp->rf_vp = NULL; 4107 } 4108 rfs4_dbe_invalidate(fp->rf_dbe); 4109 } 4110 } 4111 4112 /* 4113 * Given a directory that is being unexported, cleanup/release all 4114 * state in the server that refers to objects residing underneath this 4115 * particular export. The ordering of the release is important. 4116 * Lock_owner, then state and then file. 4117 * 4118 * NFS zones note: nfs_export.c:unexport() calls this from a 4119 * thread in the global zone for NGZ data structures, so we 4120 * CANNOT use zone_getspecific anywhere in this code path. 4121 */ 4122 void 4123 rfs4_clean_state_exi(nfs_export_t *ne, struct exportinfo *exi) 4124 { 4125 nfs_globals_t *ng; 4126 nfs4_srv_t *nsrv4; 4127 4128 ng = ne->ne_globals; 4129 ASSERT(ng->nfs_zoneid == exi->exi_zoneid); 4130 nsrv4 = ng->nfs4_srv; 4131 4132 mutex_enter(&nsrv4->state_lock); 4133 4134 if (nsrv4->nfs4_server_state == NULL) { 4135 mutex_exit(&nsrv4->state_lock); 4136 return; 4137 } 4138 4139 rfs4_dbe_walk(nsrv4->rfs4_lo_state_tab, 4140 rfs4_lo_state_walk_callout, exi); 4141 rfs4_dbe_walk(nsrv4->rfs4_state_tab, rfs4_state_walk_callout, exi); 4142 rfs4_dbe_walk(nsrv4->rfs4_deleg_state_tab, 4143 rfs4_deleg_state_walk_callout, exi); 4144 rfs4_dbe_walk(nsrv4->rfs4_file_tab, rfs4_file_walk_callout, exi); 4145 4146 mutex_exit(&nsrv4->state_lock); 4147 } 4148