xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs4_state.c (revision 8361acf58a302751348aac091ab09484f3ecfb8c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * Copyright 2018 Nexenta Systems, Inc.
28  * Copyright 2019 Nexenta by DDN, Inc.
29  * Copyright 2023 MNX Cloud, Inc.
30  */
31 
32 #include <sys/systm.h>
33 #include <sys/kmem.h>
34 #include <sys/cmn_err.h>
35 #include <sys/atomic.h>
36 #include <sys/clconf.h>
37 #include <sys/cladm.h>
38 #include <sys/flock.h>
39 #include <nfs/export.h>
40 #include <nfs/nfs.h>
41 #include <nfs/nfs4.h>
42 #include <nfs/nfssys.h>
43 #include <nfs/lm.h>
44 #include <sys/pathname.h>
45 #include <sys/sdt.h>
46 #include <sys/nvpair.h>
47 
48 extern u_longlong_t nfs4_srv_caller_id;
49 
50 extern uint_t nfs4_srv_vkey;
51 
52 stateid4 special0 = {
53 	0,
54 	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
55 };
56 
57 stateid4 special1 = {
58 	0xffffffff,
59 	{
60 		(char)0xff, (char)0xff, (char)0xff, (char)0xff,
61 		(char)0xff, (char)0xff, (char)0xff, (char)0xff,
62 		(char)0xff, (char)0xff, (char)0xff, (char)0xff
63 	}
64 };
65 
66 
67 #define	ISSPECIAL(id)  (stateid4_cmp(id, &special0) || \
68 			stateid4_cmp(id, &special1))
69 
70 /* For embedding the cluster nodeid into our clientid */
71 #define	CLUSTER_NODEID_SHIFT	24
72 #define	CLUSTER_MAX_NODEID	255
73 
74 #ifdef DEBUG
75 int rfs4_debug;
76 #endif
77 
78 rfs4_db_mem_cache_t rfs4_db_mem_cache_table[RFS4_DB_MEM_CACHE_NUM];
79 static uint32_t rfs4_database_debug = 0x00;
80 
81 /* CSTYLED */
82 static void rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf);
83 static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf);
84 static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip);
85 static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip);
86 
87 /*
88  * Couple of simple init/destroy functions for a general waiter
89  */
90 void
91 rfs4_sw_init(rfs4_state_wait_t *swp)
92 {
93 	mutex_init(swp->sw_cv_lock, NULL, MUTEX_DEFAULT, NULL);
94 	cv_init(swp->sw_cv, NULL, CV_DEFAULT, NULL);
95 	swp->sw_active = FALSE;
96 	swp->sw_wait_count = 0;
97 }
98 
99 void
100 rfs4_sw_destroy(rfs4_state_wait_t *swp)
101 {
102 	mutex_destroy(swp->sw_cv_lock);
103 	cv_destroy(swp->sw_cv);
104 }
105 
106 void
107 rfs4_sw_enter(rfs4_state_wait_t *swp)
108 {
109 	mutex_enter(swp->sw_cv_lock);
110 	while (swp->sw_active) {
111 		swp->sw_wait_count++;
112 		cv_wait(swp->sw_cv, swp->sw_cv_lock);
113 		swp->sw_wait_count--;
114 	}
115 	ASSERT(swp->sw_active == FALSE);
116 	swp->sw_active = TRUE;
117 	mutex_exit(swp->sw_cv_lock);
118 }
119 
120 void
121 rfs4_sw_exit(rfs4_state_wait_t *swp)
122 {
123 	mutex_enter(swp->sw_cv_lock);
124 	ASSERT(swp->sw_active == TRUE);
125 	swp->sw_active = FALSE;
126 	if (swp->sw_wait_count != 0)
127 		cv_broadcast(swp->sw_cv);
128 	mutex_exit(swp->sw_cv_lock);
129 }
130 
131 static void
132 deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
133 {
134 	lock_owner4 *slo = &sres->LOCK4res_u.denied.owner;
135 	lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner;
136 
137 	if (sres->status == NFS4ERR_DENIED) {
138 		dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP);
139 		bcopy(slo->owner_val, dlo->owner_val, slo->owner_len);
140 	}
141 }
142 
143 /*
144  * CPR callback id -- not related to v4 callbacks
145  */
146 static callb_id_t cpr_id = 0;
147 
148 static void
149 deep_lock_free(LOCK4res *res)
150 {
151 	lock_owner4 *lo = &res->LOCK4res_u.denied.owner;
152 
153 	if (res->status == NFS4ERR_DENIED)
154 		kmem_free(lo->owner_val, lo->owner_len);
155 }
156 
157 static void
158 deep_open_copy(OPEN4res *dres, OPEN4res *sres)
159 {
160 	nfsace4 *sacep, *dacep;
161 
162 	if (sres->status != NFS4_OK) {
163 		return;
164 	}
165 
166 	dres->attrset = sres->attrset;
167 
168 	switch (sres->delegation.delegation_type) {
169 	case OPEN_DELEGATE_NONE:
170 		return;
171 	case OPEN_DELEGATE_READ:
172 		sacep = &sres->delegation.open_delegation4_u.read.permissions;
173 		dacep = &dres->delegation.open_delegation4_u.read.permissions;
174 		break;
175 	case OPEN_DELEGATE_WRITE:
176 		sacep = &sres->delegation.open_delegation4_u.write.permissions;
177 		dacep = &dres->delegation.open_delegation4_u.write.permissions;
178 		break;
179 	}
180 	dacep->who.utf8string_val =
181 	    kmem_alloc(sacep->who.utf8string_len, KM_SLEEP);
182 	bcopy(sacep->who.utf8string_val, dacep->who.utf8string_val,
183 	    sacep->who.utf8string_len);
184 }
185 
186 static void
187 deep_open_free(OPEN4res *res)
188 {
189 	nfsace4 *acep;
190 	if (res->status != NFS4_OK)
191 		return;
192 
193 	switch (res->delegation.delegation_type) {
194 	case OPEN_DELEGATE_NONE:
195 		return;
196 	case OPEN_DELEGATE_READ:
197 		acep = &res->delegation.open_delegation4_u.read.permissions;
198 		break;
199 	case OPEN_DELEGATE_WRITE:
200 		acep = &res->delegation.open_delegation4_u.write.permissions;
201 		break;
202 	}
203 
204 	if (acep->who.utf8string_val) {
205 		kmem_free(acep->who.utf8string_val, acep->who.utf8string_len);
206 		acep->who.utf8string_val = NULL;
207 	}
208 }
209 
210 void
211 rfs4_free_reply(nfs_resop4 *rp)
212 {
213 	switch (rp->resop) {
214 	case OP_LOCK:
215 		deep_lock_free(&rp->nfs_resop4_u.oplock);
216 		break;
217 	case OP_OPEN:
218 		deep_open_free(&rp->nfs_resop4_u.opopen);
219 	default:
220 		break;
221 	}
222 }
223 
224 void
225 rfs4_copy_reply(nfs_resop4 *dst, nfs_resop4 *src)
226 {
227 	*dst = *src;
228 
229 	/* Handle responses that need deep copy */
230 	switch (src->resop) {
231 	case OP_LOCK:
232 		deep_lock_copy(&dst->nfs_resop4_u.oplock,
233 		    &src->nfs_resop4_u.oplock);
234 		break;
235 	case OP_OPEN:
236 		deep_open_copy(&dst->nfs_resop4_u.opopen,
237 		    &src->nfs_resop4_u.opopen);
238 		break;
239 	default:
240 		break;
241 	};
242 }
243 
244 /*
245  * This is the implementation of the underlying state engine. The
246  * public interface to this engine is described by
247  * nfs4_state.h. Callers to the engine should hold no state engine
248  * locks when they call in to it. If the protocol needs to lock data
249  * structures it should do so after acquiring all references to them
250  * first and then follow the following lock order:
251  *
252  *	client > openowner > state > lo_state > lockowner > file.
253  *
254  * Internally we only allow a thread to hold one hash bucket lock at a
255  * time and the lock is higher in the lock order (must be acquired
256  * first) than the data structure that is on that hash list.
257  *
258  * If a new reference was acquired by the caller, that reference needs
259  * to be released after releasing all acquired locks with the
260  * corresponding rfs4_*_rele routine.
261  */
262 
263 /*
264  * This code is some what prototypical for now. Its purpose currently is to
265  * implement the interfaces sufficiently to finish the higher protocol
266  * elements. This will be replaced by a dynamically resizeable tables
267  * backed by kmem_cache allocator. However synchronization is handled
268  * correctly (I hope) and will not change by much.  The mutexes for
269  * the hash buckets that can be used to create new instances of data
270  * structures  might be good candidates to evolve into reader writer
271  * locks. If it has to do a creation, it would be holding the
272  * mutex across a kmem_alloc with KM_SLEEP specified.
273  */
274 
275 #ifdef DEBUG
276 #define	TABSIZE 17
277 #else
278 #define	TABSIZE 2047
279 #endif
280 
281 #define	ADDRHASH(key) ((unsigned long)(key) >> 3)
282 
283 #define	MAXTABSZ 1024*1024
284 
285 /* The values below are rfs4_lease_time units */
286 
287 #ifdef DEBUG
288 #define	CLIENT_CACHE_TIME 1
289 #define	OPENOWNER_CACHE_TIME 1
290 #define	STATE_CACHE_TIME 1
291 #define	LO_STATE_CACHE_TIME 1
292 #define	LOCKOWNER_CACHE_TIME 1
293 #define	FILE_CACHE_TIME 3
294 #define	DELEG_STATE_CACHE_TIME 1
295 #else
296 #define	CLIENT_CACHE_TIME 10
297 #define	OPENOWNER_CACHE_TIME 5
298 #define	STATE_CACHE_TIME 1
299 #define	LO_STATE_CACHE_TIME 1
300 #define	LOCKOWNER_CACHE_TIME 3
301 #define	FILE_CACHE_TIME 40
302 #define	DELEG_STATE_CACHE_TIME 1
303 #endif
304 
305 /*
306  * NFSv4 server state databases
307  *
308  * Initilized when the module is loaded and used by NFSv4 state tables.
309  * These kmem_cache databases are global, the tables that make use of these
310  * are per zone.
311  */
312 kmem_cache_t *rfs4_client_mem_cache;
313 kmem_cache_t *rfs4_clntIP_mem_cache;
314 kmem_cache_t *rfs4_openown_mem_cache;
315 kmem_cache_t *rfs4_openstID_mem_cache;
316 kmem_cache_t *rfs4_lockstID_mem_cache;
317 kmem_cache_t *rfs4_lockown_mem_cache;
318 kmem_cache_t *rfs4_file_mem_cache;
319 kmem_cache_t *rfs4_delegstID_mem_cache;
320 
321 /*
322  * NFSv4 state table functions
323  */
324 static bool_t rfs4_client_create(rfs4_entry_t, void *);
325 static void rfs4_dss_remove_cpleaf(rfs4_client_t *);
326 static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *);
327 static void rfs4_client_destroy(rfs4_entry_t);
328 static bool_t rfs4_client_expiry(rfs4_entry_t);
329 static uint32_t clientid_hash(void *);
330 static bool_t clientid_compare(rfs4_entry_t, void *);
331 static void *clientid_mkkey(rfs4_entry_t);
332 static uint32_t nfsclnt_hash(void *);
333 static bool_t nfsclnt_compare(rfs4_entry_t, void *);
334 static void *nfsclnt_mkkey(rfs4_entry_t);
335 static bool_t rfs4_clntip_expiry(rfs4_entry_t);
336 static void rfs4_clntip_destroy(rfs4_entry_t);
337 static bool_t rfs4_clntip_create(rfs4_entry_t, void *);
338 static uint32_t clntip_hash(void *);
339 static bool_t clntip_compare(rfs4_entry_t, void *);
340 static void *clntip_mkkey(rfs4_entry_t);
341 static bool_t rfs4_openowner_create(rfs4_entry_t, void *);
342 static void rfs4_openowner_destroy(rfs4_entry_t);
343 static bool_t rfs4_openowner_expiry(rfs4_entry_t);
344 static uint32_t openowner_hash(void *);
345 static bool_t openowner_compare(rfs4_entry_t, void *);
346 static void *openowner_mkkey(rfs4_entry_t);
347 static bool_t rfs4_state_create(rfs4_entry_t, void *);
348 static void rfs4_state_destroy(rfs4_entry_t);
349 static bool_t rfs4_state_expiry(rfs4_entry_t);
350 static uint32_t state_hash(void *);
351 static bool_t state_compare(rfs4_entry_t, void *);
352 static void *state_mkkey(rfs4_entry_t);
353 static uint32_t state_owner_file_hash(void *);
354 static bool_t state_owner_file_compare(rfs4_entry_t, void *);
355 static void *state_owner_file_mkkey(rfs4_entry_t);
356 static uint32_t state_file_hash(void *);
357 static bool_t state_file_compare(rfs4_entry_t, void *);
358 static void *state_file_mkkey(rfs4_entry_t);
359 static bool_t rfs4_lo_state_create(rfs4_entry_t, void *);
360 static void rfs4_lo_state_destroy(rfs4_entry_t);
361 static bool_t rfs4_lo_state_expiry(rfs4_entry_t);
362 static uint32_t lo_state_hash(void *);
363 static bool_t lo_state_compare(rfs4_entry_t, void *);
364 static void *lo_state_mkkey(rfs4_entry_t);
365 static uint32_t lo_state_lo_hash(void *);
366 static bool_t lo_state_lo_compare(rfs4_entry_t, void *);
367 static void *lo_state_lo_mkkey(rfs4_entry_t);
368 static bool_t rfs4_lockowner_create(rfs4_entry_t, void *);
369 static void rfs4_lockowner_destroy(rfs4_entry_t);
370 static bool_t rfs4_lockowner_expiry(rfs4_entry_t);
371 static uint32_t lockowner_hash(void *);
372 static bool_t lockowner_compare(rfs4_entry_t, void *);
373 static void *lockowner_mkkey(rfs4_entry_t);
374 static uint32_t pid_hash(void *);
375 static bool_t pid_compare(rfs4_entry_t, void *);
376 static void *pid_mkkey(rfs4_entry_t);
377 static bool_t rfs4_file_create(rfs4_entry_t, void *);
378 static void rfs4_file_destroy(rfs4_entry_t);
379 static uint32_t file_hash(void *);
380 static bool_t file_compare(rfs4_entry_t, void *);
381 static void *file_mkkey(rfs4_entry_t);
382 static bool_t rfs4_deleg_state_create(rfs4_entry_t, void *);
383 static void rfs4_deleg_state_destroy(rfs4_entry_t);
384 static bool_t rfs4_deleg_state_expiry(rfs4_entry_t);
385 static uint32_t deleg_hash(void *);
386 static bool_t deleg_compare(rfs4_entry_t, void *);
387 static void *deleg_mkkey(rfs4_entry_t);
388 static uint32_t deleg_state_hash(void *);
389 static bool_t deleg_state_compare(rfs4_entry_t, void *);
390 static void *deleg_state_mkkey(rfs4_entry_t);
391 
392 static void rfs4_state_rele_nounlock(rfs4_state_t *);
393 
394 static int rfs4_ss_enabled = 0;
395 
396 void
397 rfs4_ss_pnfree(rfs4_ss_pn_t *ss_pn)
398 {
399 	kmem_free(ss_pn, sizeof (rfs4_ss_pn_t));
400 }
401 
402 static rfs4_ss_pn_t *
403 rfs4_ss_pnalloc(char *dir, char *leaf)
404 {
405 	rfs4_ss_pn_t *ss_pn;
406 	int dir_len, leaf_len;
407 
408 	/*
409 	 * validate we have a resonable path
410 	 * (account for the '/' and trailing null)
411 	 */
412 	if ((dir_len = strlen(dir)) > MAXPATHLEN ||
413 	    (leaf_len = strlen(leaf)) > MAXNAMELEN ||
414 	    (dir_len + leaf_len + 2) > MAXPATHLEN) {
415 		return (NULL);
416 	}
417 
418 	ss_pn = kmem_alloc(sizeof (rfs4_ss_pn_t), KM_SLEEP);
419 
420 	(void) snprintf(ss_pn->pn, MAXPATHLEN, "%s/%s", dir, leaf);
421 	/* Handy pointer to just the leaf name */
422 	ss_pn->leaf = ss_pn->pn + dir_len + 1;
423 	return (ss_pn);
424 }
425 
426 
427 /*
428  * Move the "leaf" filename from "sdir" directory
429  * to the "ddir" directory. Return the pathname of
430  * the destination unless the rename fails in which
431  * case we need to return the source pathname.
432  */
433 static rfs4_ss_pn_t *
434 rfs4_ss_movestate(char *sdir, char *ddir, char *leaf)
435 {
436 	rfs4_ss_pn_t *src, *dst;
437 
438 	if ((src = rfs4_ss_pnalloc(sdir, leaf)) == NULL)
439 		return (NULL);
440 
441 	if ((dst = rfs4_ss_pnalloc(ddir, leaf)) == NULL) {
442 		rfs4_ss_pnfree(src);
443 		return (NULL);
444 	}
445 
446 	/*
447 	 * If the rename fails we shall return the src
448 	 * pathname and free the dst. Otherwise we need
449 	 * to free the src and return the dst pathanme.
450 	 */
451 	if (vn_rename(src->pn, dst->pn, UIO_SYSSPACE)) {
452 		rfs4_ss_pnfree(dst);
453 		return (src);
454 	}
455 	rfs4_ss_pnfree(src);
456 	return (dst);
457 }
458 
459 
460 static rfs4_oldstate_t *
461 rfs4_ss_getstate(vnode_t *dvp, rfs4_ss_pn_t *ss_pn)
462 {
463 	struct uio uio;
464 	struct iovec iov[3];
465 
466 	rfs4_oldstate_t *cl_ss = NULL;
467 	vnode_t *vp;
468 	vattr_t va;
469 	uint_t id_len;
470 	int err, kill_file, file_vers;
471 
472 	if (ss_pn == NULL)
473 		return (NULL);
474 
475 	/*
476 	 * open the state file.
477 	 */
478 	if (vn_open(ss_pn->pn, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0) != 0) {
479 		return (NULL);
480 	}
481 
482 	if (vp->v_type != VREG) {
483 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
484 		VN_RELE(vp);
485 		return (NULL);
486 	}
487 
488 	err = VOP_ACCESS(vp, VREAD, 0, CRED(), NULL);
489 	if (err) {
490 		/*
491 		 * We don't have read access? better get the heck out.
492 		 */
493 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
494 		VN_RELE(vp);
495 		return (NULL);
496 	}
497 
498 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
499 	/*
500 	 * get the file size to do some basic validation
501 	 */
502 	va.va_mask = AT_SIZE;
503 	err = VOP_GETATTR(vp, &va, 0, CRED(), NULL);
504 
505 	kill_file = (va.va_size == 0 || va.va_size <
506 	    (NFS4_VERIFIER_SIZE + sizeof (uint_t)+1));
507 
508 	if (err || kill_file) {
509 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
510 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
511 		VN_RELE(vp);
512 		if (kill_file) {
513 			(void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0);
514 		}
515 		return (NULL);
516 	}
517 
518 	cl_ss = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
519 
520 	/*
521 	 * build iovecs to read in the file_version, verifier and id_len
522 	 */
523 	iov[0].iov_base = (caddr_t)&file_vers;
524 	iov[0].iov_len = sizeof (int);
525 	iov[1].iov_base = (caddr_t)&cl_ss->cl_id4.verifier;
526 	iov[1].iov_len = NFS4_VERIFIER_SIZE;
527 	iov[2].iov_base = (caddr_t)&id_len;
528 	iov[2].iov_len = sizeof (uint_t);
529 
530 	uio.uio_iov = iov;
531 	uio.uio_iovcnt = 3;
532 	uio.uio_segflg = UIO_SYSSPACE;
533 	uio.uio_loffset = 0;
534 	uio.uio_resid = sizeof (int) + NFS4_VERIFIER_SIZE + sizeof (uint_t);
535 
536 	err = VOP_READ(vp, &uio, FREAD, CRED(), NULL);
537 	if (err != 0) {
538 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
539 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
540 		VN_RELE(vp);
541 		kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
542 		return (NULL);
543 	}
544 
545 	/*
546 	 * if the file_version doesn't match or if the
547 	 * id_len is zero or the combination of the verifier,
548 	 * id_len and id_val is bigger than the file we have
549 	 * a problem. If so ditch the file.
550 	 */
551 	kill_file = (file_vers != NFS4_SS_VERSION || id_len == 0 ||
552 	    (id_len + NFS4_VERIFIER_SIZE + sizeof (uint_t)) > va.va_size);
553 
554 	if (err || kill_file) {
555 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
556 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
557 		VN_RELE(vp);
558 		kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
559 		if (kill_file) {
560 			(void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0);
561 		}
562 		return (NULL);
563 	}
564 
565 	/*
566 	 * now get the client id value
567 	 */
568 	cl_ss->cl_id4.id_val = kmem_alloc(id_len, KM_SLEEP);
569 	iov[0].iov_base = cl_ss->cl_id4.id_val;
570 	iov[0].iov_len = id_len;
571 
572 	uio.uio_iov = iov;
573 	uio.uio_iovcnt = 1;
574 	uio.uio_segflg = UIO_SYSSPACE;
575 	uio.uio_resid = cl_ss->cl_id4.id_len = id_len;
576 
577 	err = VOP_READ(vp, &uio, FREAD, CRED(), NULL);
578 	if (err != 0) {
579 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
580 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
581 		VN_RELE(vp);
582 		kmem_free(cl_ss->cl_id4.id_val, id_len);
583 		kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
584 		return (NULL);
585 	}
586 
587 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
588 	(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
589 	VN_RELE(vp);
590 	return (cl_ss);
591 }
592 
593 #ifdef	nextdp
594 #undef nextdp
595 #endif
596 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
597 
598 /*
599  * Add entries from statedir to supplied oldstate list.
600  * Optionally, move all entries from statedir -> destdir.
601  */
602 void
603 rfs4_ss_oldstate(rfs4_oldstate_t *oldstate, char *statedir, char *destdir)
604 {
605 	rfs4_ss_pn_t *ss_pn;
606 	rfs4_oldstate_t *cl_ss = NULL;
607 	char	*dirt = NULL;
608 	int	err, dir_eof = 0, size = 0;
609 	vnode_t *dvp;
610 	struct iovec iov;
611 	struct uio uio;
612 	struct dirent64 *dep;
613 	offset_t dirchunk_offset = 0;
614 
615 	/*
616 	 * open the state directory
617 	 */
618 	if (vn_open(statedir, UIO_SYSSPACE, FREAD, 0, &dvp, 0, 0))
619 		return;
620 
621 	if (dvp->v_type != VDIR || VOP_ACCESS(dvp, VREAD, 0, CRED(), NULL))
622 		goto out;
623 
624 	dirt = kmem_alloc(RFS4_SS_DIRSIZE, KM_SLEEP);
625 
626 	/*
627 	 * Get and process the directory entries
628 	 */
629 	while (!dir_eof) {
630 		(void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL);
631 		iov.iov_base = dirt;
632 		iov.iov_len = RFS4_SS_DIRSIZE;
633 		uio.uio_iov = &iov;
634 		uio.uio_iovcnt = 1;
635 		uio.uio_segflg = UIO_SYSSPACE;
636 		uio.uio_loffset = dirchunk_offset;
637 		uio.uio_resid = RFS4_SS_DIRSIZE;
638 
639 		err = VOP_READDIR(dvp, &uio, CRED(), &dir_eof, NULL, 0);
640 		VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
641 		if (err)
642 			goto out;
643 
644 		size = RFS4_SS_DIRSIZE - uio.uio_resid;
645 
646 		/*
647 		 * Process all the directory entries in this
648 		 * readdir chunk
649 		 */
650 		for (dep = (struct dirent64 *)dirt; size > 0;
651 		    dep = nextdp(dep)) {
652 
653 			size -= dep->d_reclen;
654 			dirchunk_offset = dep->d_off;
655 
656 			/*
657 			 * Skip '.' and '..'
658 			 */
659 			if (NFS_IS_DOTNAME(dep->d_name))
660 				continue;
661 
662 			ss_pn = rfs4_ss_pnalloc(statedir, dep->d_name);
663 			if (ss_pn == NULL)
664 				continue;
665 
666 			cl_ss = rfs4_ss_getstate(dvp, ss_pn);
667 			if (cl_ss != NULL) {
668 				if (destdir != NULL) {
669 					rfs4_ss_pnfree(ss_pn);
670 					cl_ss->ss_pn = rfs4_ss_movestate(
671 					    statedir, destdir, dep->d_name);
672 				} else {
673 					cl_ss->ss_pn = ss_pn;
674 				}
675 				insque(cl_ss, oldstate);
676 			} else {
677 				rfs4_ss_pnfree(ss_pn);
678 			}
679 		}
680 	}
681 
682 out:
683 	(void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL);
684 	VN_RELE(dvp);
685 	if (dirt)
686 		kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE);
687 }
688 
689 static void
690 rfs4_ss_init(nfs4_srv_t *nsrv4)
691 {
692 	int npaths = 1;
693 	char *default_dss_path = NFS4_DSS_VAR_DIR;
694 
695 	/* read the default stable storage state */
696 	rfs4_dss_readstate(nsrv4, npaths, &default_dss_path);
697 
698 	rfs4_ss_enabled = 1;
699 }
700 
701 static void
702 rfs4_ss_fini(nfs4_srv_t *nsrv4)
703 {
704 	rfs4_servinst_t *sip;
705 
706 	mutex_enter(&nsrv4->servinst_lock);
707 	sip = nsrv4->nfs4_cur_servinst;
708 	while (sip != NULL) {
709 		rfs4_dss_clear_oldstate(sip);
710 		sip = sip->next;
711 	}
712 	mutex_exit(&nsrv4->servinst_lock);
713 }
714 
715 /*
716  * Remove all oldstate files referenced by this servinst.
717  */
718 static void
719 rfs4_dss_clear_oldstate(rfs4_servinst_t *sip)
720 {
721 	rfs4_oldstate_t *os_head, *osp;
722 
723 	rw_enter(&sip->oldstate_lock, RW_WRITER);
724 	os_head = sip->oldstate;
725 
726 	if (os_head == NULL) {
727 		rw_exit(&sip->oldstate_lock);
728 		return;
729 	}
730 
731 	/* skip dummy entry */
732 	osp = os_head->next;
733 	while (osp != os_head) {
734 		char *leaf = osp->ss_pn->leaf;
735 		rfs4_oldstate_t *os_next;
736 
737 		rfs4_dss_remove_leaf(sip, NFS4_DSS_OLDSTATE_LEAF, leaf);
738 
739 		if (osp->cl_id4.id_val)
740 			kmem_free(osp->cl_id4.id_val, osp->cl_id4.id_len);
741 		rfs4_ss_pnfree(osp->ss_pn);
742 
743 		os_next = osp->next;
744 		remque(osp);
745 		kmem_free(osp, sizeof (rfs4_oldstate_t));
746 		osp = os_next;
747 	}
748 
749 	rw_exit(&sip->oldstate_lock);
750 }
751 
752 /*
753  * Form the state and oldstate paths, and read in the stable storage files.
754  */
755 void
756 rfs4_dss_readstate(nfs4_srv_t *nsrv4, int npaths, char **paths)
757 {
758 	int i;
759 	char *state, *oldstate;
760 
761 	state = kmem_alloc(MAXPATHLEN, KM_SLEEP);
762 	oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP);
763 
764 	for (i = 0; i < npaths; i++) {
765 		char *path = paths[i];
766 
767 		(void) sprintf(state, "%s/%s", path, NFS4_DSS_STATE_LEAF);
768 		(void) sprintf(oldstate, "%s/%s", path, NFS4_DSS_OLDSTATE_LEAF);
769 
770 		/*
771 		 * Populate the current server instance's oldstate list.
772 		 *
773 		 * 1. Read stable storage data from old state directory,
774 		 *    leaving its contents alone.
775 		 *
776 		 * 2. Read stable storage data from state directory,
777 		 *    and move the latter's contents to old state
778 		 *    directory.
779 		 */
780 		/* CSTYLED */
781 		rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, oldstate, NULL);
782 		/* CSTYLED */
783 		rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, state, oldstate);
784 	}
785 
786 	kmem_free(state, MAXPATHLEN);
787 	kmem_free(oldstate, MAXPATHLEN);
788 }
789 
790 
791 /*
792  * Check if we are still in grace and if the client can be
793  * granted permission to perform reclaims.
794  */
795 void
796 rfs4_ss_chkclid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
797 {
798 	rfs4_servinst_t *sip;
799 
800 	/*
801 	 * It should be sufficient to check the oldstate data for just
802 	 * this client's instance. However, since our per-instance
803 	 * client grouping is solely temporal, HA-NFSv4 RG failover
804 	 * might result in clients of the same RG being partitioned into
805 	 * separate instances.
806 	 *
807 	 * Until the client grouping is improved, we must check the
808 	 * oldstate data for all instances with an active grace period.
809 	 *
810 	 * This also serves as the mechanism to remove stale oldstate data.
811 	 * The first time we check an instance after its grace period has
812 	 * expired, the oldstate data should be cleared.
813 	 *
814 	 * Start at the current instance, and walk the list backwards
815 	 * to the first.
816 	 */
817 	mutex_enter(&nsrv4->servinst_lock);
818 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
819 		rfs4_ss_chkclid_sip(cp, sip);
820 
821 		/* if the above check found this client, we're done */
822 		if (cp->rc_can_reclaim)
823 			break;
824 	}
825 	mutex_exit(&nsrv4->servinst_lock);
826 }
827 
828 static void
829 rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip)
830 {
831 	rfs4_oldstate_t *osp, *os_head;
832 
833 	/* short circuit everything if this server instance has no oldstate */
834 	rw_enter(&sip->oldstate_lock, RW_READER);
835 	os_head = sip->oldstate;
836 	rw_exit(&sip->oldstate_lock);
837 	if (os_head == NULL)
838 		return;
839 
840 	/*
841 	 * If this server instance is no longer in a grace period then
842 	 * the client won't be able to reclaim. No further need for this
843 	 * instance's oldstate data, so it can be cleared.
844 	 */
845 	if (!rfs4_servinst_in_grace(sip))
846 		return;
847 
848 	/* this instance is still in grace; search for the clientid */
849 
850 	rw_enter(&sip->oldstate_lock, RW_READER);
851 
852 	os_head = sip->oldstate;
853 	/* skip dummy entry */
854 	osp = os_head->next;
855 	while (osp != os_head) {
856 		if (osp->cl_id4.id_len == cp->rc_nfs_client.id_len) {
857 			if (bcmp(osp->cl_id4.id_val, cp->rc_nfs_client.id_val,
858 			    osp->cl_id4.id_len) == 0) {
859 				cp->rc_can_reclaim = 1;
860 				break;
861 			}
862 		}
863 		osp = osp->next;
864 	}
865 
866 	rw_exit(&sip->oldstate_lock);
867 }
868 
869 /*
870  * Place client information into stable storage: 1/3.
871  * First, generate the leaf filename, from the client's IP address and
872  * the server-generated short-hand clientid.
873  */
874 void
875 rfs4_ss_clid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
876 {
877 	const char *kinet_ntop6(uchar_t *, char *, size_t);
878 	char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
879 	struct sockaddr *ca;
880 	uchar_t *b;
881 
882 	if (rfs4_ss_enabled == 0) {
883 		return;
884 	}
885 
886 	buf[0] = 0;
887 
888 	ca = (struct sockaddr *)&cp->rc_addr;
889 
890 	/*
891 	 * Convert the caller's IP address to a dotted string
892 	 */
893 	if (ca->sa_family == AF_INET) {
894 		b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
895 		(void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF,
896 		    b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
897 	} else if (ca->sa_family == AF_INET6) {
898 		struct sockaddr_in6 *sin6;
899 
900 		sin6 = (struct sockaddr_in6 *)ca;
901 		(void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
902 		    buf, INET6_ADDRSTRLEN);
903 	}
904 
905 	(void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
906 	    (longlong_t)cp->rc_clientid);
907 	rfs4_ss_clid_write(nsrv4, cp, leaf);
908 }
909 
910 /*
911  * Place client information into stable storage: 2/3.
912  * DSS: distributed stable storage: the file may need to be written to
913  * multiple directories.
914  */
915 static void
916 rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf)
917 {
918 	rfs4_servinst_t *sip;
919 
920 	/*
921 	 * It should be sufficient to write the leaf file to (all) DSS paths
922 	 * associated with just this client's instance. However, since our
923 	 * per-instance client grouping is solely temporal, HA-NFSv4 RG
924 	 * failover might result in us losing DSS data.
925 	 *
926 	 * Until the client grouping is improved, we must write the DSS data
927 	 * to all instances' paths. Start at the current instance, and
928 	 * walk the list backwards to the first.
929 	 */
930 	mutex_enter(&nsrv4->servinst_lock);
931 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
932 		int i, npaths = sip->dss_npaths;
933 
934 		/* write the leaf file to all DSS paths */
935 		for (i = 0; i < npaths; i++) {
936 			rfs4_dss_path_t *dss_path = sip->dss_paths[i];
937 
938 			/* HA-NFSv4 path might have been failed-away from us */
939 			if (dss_path == NULL)
940 				continue;
941 
942 			rfs4_ss_clid_write_one(cp, dss_path->path, leaf);
943 		}
944 	}
945 	mutex_exit(&nsrv4->servinst_lock);
946 }
947 
948 /*
949  * Place client information into stable storage: 3/3.
950  * Write the stable storage data to the requested file.
951  */
952 static void
953 rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf)
954 {
955 	int ioflag;
956 	int file_vers = NFS4_SS_VERSION;
957 	size_t dirlen;
958 	struct uio uio;
959 	struct iovec iov[4];
960 	char *dir;
961 	rfs4_ss_pn_t *ss_pn;
962 	vnode_t *vp;
963 	nfs_client_id4 *cl_id4 = &(cp->rc_nfs_client);
964 
965 	/* allow 2 extra bytes for '/' & NUL */
966 	dirlen = strlen(dss_path) + strlen(NFS4_DSS_STATE_LEAF) + 2;
967 	dir = kmem_alloc(dirlen, KM_SLEEP);
968 	(void) sprintf(dir, "%s/%s", dss_path, NFS4_DSS_STATE_LEAF);
969 
970 	ss_pn = rfs4_ss_pnalloc(dir, leaf);
971 	/* rfs4_ss_pnalloc takes its own copy */
972 	kmem_free(dir, dirlen);
973 	if (ss_pn == NULL)
974 		return;
975 
976 	if (vn_open(ss_pn->pn, UIO_SYSSPACE, FCREAT|FWRITE, 0600, &vp,
977 	    CRCREAT, 0)) {
978 		rfs4_ss_pnfree(ss_pn);
979 		return;
980 	}
981 
982 	/*
983 	 * We need to record leaf - i.e. the filename - so that we know
984 	 * what to remove, in the future. However, the dir part of cp->ss_pn
985 	 * should never be referenced directly, since it's potentially only
986 	 * one of several paths with this leaf in it.
987 	 */
988 	if (cp->rc_ss_pn != NULL) {
989 		if (strcmp(cp->rc_ss_pn->leaf, leaf) == 0) {
990 			/* we've already recorded *this* leaf */
991 			rfs4_ss_pnfree(ss_pn);
992 		} else {
993 			/* replace with this leaf */
994 			rfs4_ss_pnfree(cp->rc_ss_pn);
995 			cp->rc_ss_pn = ss_pn;
996 		}
997 	} else {
998 		cp->rc_ss_pn = ss_pn;
999 	}
1000 
1001 	/*
1002 	 * Build a scatter list that points to the nfs_client_id4
1003 	 */
1004 	iov[0].iov_base = (caddr_t)&file_vers;
1005 	iov[0].iov_len = sizeof (int);
1006 	iov[1].iov_base = (caddr_t)&(cl_id4->verifier);
1007 	iov[1].iov_len = NFS4_VERIFIER_SIZE;
1008 	iov[2].iov_base = (caddr_t)&(cl_id4->id_len);
1009 	iov[2].iov_len = sizeof (uint_t);
1010 	iov[3].iov_base = (caddr_t)cl_id4->id_val;
1011 	iov[3].iov_len = cl_id4->id_len;
1012 
1013 	uio.uio_iov = iov;
1014 	uio.uio_iovcnt = 4;
1015 	uio.uio_loffset = 0;
1016 	uio.uio_segflg = UIO_SYSSPACE;
1017 	uio.uio_llimit = (rlim64_t)MAXOFFSET_T;
1018 	uio.uio_resid = cl_id4->id_len + sizeof (int) +
1019 	    NFS4_VERIFIER_SIZE + sizeof (uint_t);
1020 
1021 	ioflag = uio.uio_fmode = (FWRITE|FSYNC);
1022 	uio.uio_extflg = UIO_COPY_DEFAULT;
1023 
1024 	(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1025 	/* write the full client id to the file. */
1026 	(void) VOP_WRITE(vp, &uio, ioflag, CRED(), NULL);
1027 	VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1028 
1029 	(void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
1030 	VN_RELE(vp);
1031 }
1032 
1033 /*
1034  * DSS: distributed stable storage.
1035  * Unpack the list of paths passed by nfsd.
1036  * Use nvlist_alloc(9F) to manage the data.
1037  * The caller is responsible for allocating and freeing the buffer.
1038  */
1039 int
1040 rfs4_dss_setpaths(char *buf, size_t buflen)
1041 {
1042 	int error;
1043 
1044 	/*
1045 	 * If this is a "warm start", i.e. we previously had DSS paths,
1046 	 * preserve the old paths.
1047 	 */
1048 	if (rfs4_dss_paths != NULL) {
1049 		/*
1050 		 * Before we lose the ptr, destroy the nvlist and pathnames
1051 		 * array from the warm start before this one.
1052 		 */
1053 		nvlist_free(rfs4_dss_oldpaths);
1054 		rfs4_dss_oldpaths = rfs4_dss_paths;
1055 	}
1056 
1057 	/* unpack the buffer into a searchable nvlist */
1058 	error = nvlist_unpack(buf, buflen, &rfs4_dss_paths, KM_SLEEP);
1059 	if (error)
1060 		return (error);
1061 
1062 	/*
1063 	 * Search the nvlist for the pathnames nvpair (which is the only nvpair
1064 	 * in the list, and record its location.
1065 	 */
1066 	error = nvlist_lookup_string_array(rfs4_dss_paths, NFS4_DSS_NVPAIR_NAME,
1067 	    &rfs4_dss_newpaths, &rfs4_dss_numnewpaths);
1068 	return (error);
1069 }
1070 
1071 /*
1072  * Ultimately the nfssys() call NFS4_CLR_STATE endsup here
1073  * to find and mark the client for forced expire.
1074  */
1075 static void
1076 rfs4_client_scrub(rfs4_entry_t ent, void *arg)
1077 {
1078 	rfs4_client_t *cp = (rfs4_client_t *)ent;
1079 	struct nfs4clrst_args *clr = arg;
1080 	struct sockaddr_in6 *ent_sin6;
1081 	struct in6_addr  clr_in6;
1082 	struct sockaddr_in  *ent_sin;
1083 	struct in_addr   clr_in;
1084 
1085 	if (clr->addr_type != cp->rc_addr.ss_family) {
1086 		return;
1087 	}
1088 
1089 	switch (clr->addr_type) {
1090 
1091 	case AF_INET6:
1092 		/* copyin the address from user space */
1093 		if (copyin(clr->ap, &clr_in6, sizeof (clr_in6))) {
1094 			break;
1095 		}
1096 
1097 		ent_sin6 = (struct sockaddr_in6 *)&cp->rc_addr;
1098 
1099 		/*
1100 		 * now compare, and if equivalent mark entry
1101 		 * for forced expiration
1102 		 */
1103 		if (IN6_ARE_ADDR_EQUAL(&ent_sin6->sin6_addr, &clr_in6)) {
1104 			cp->rc_forced_expire = 1;
1105 		}
1106 		break;
1107 
1108 	case AF_INET:
1109 		/* copyin the address from user space */
1110 		if (copyin(clr->ap, &clr_in, sizeof (clr_in))) {
1111 			break;
1112 		}
1113 
1114 		ent_sin = (struct sockaddr_in *)&cp->rc_addr;
1115 
1116 		/*
1117 		 * now compare, and if equivalent mark entry
1118 		 * for forced expiration
1119 		 */
1120 		if (ent_sin->sin_addr.s_addr == clr_in.s_addr) {
1121 			cp->rc_forced_expire = 1;
1122 		}
1123 		break;
1124 
1125 	default:
1126 		/* force this assert to fail */
1127 		ASSERT(clr->addr_type != clr->addr_type);
1128 	}
1129 }
1130 
1131 /*
1132  * This is called from nfssys() in order to clear server state
1133  * for the specified client IP Address.
1134  */
1135 int
1136 rfs4_clear_client_state(struct nfs4clrst_args *clr)
1137 {
1138 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
1139 	int rc;
1140 
1141 	/* Once nfssrv is loaded, every zone should have one of these. */
1142 	VERIFY(nsrv4 != NULL);
1143 
1144 	mutex_enter(&nsrv4->state_lock);
1145 	/*
1146 	 * But only after NFS service is running is the nfs4_server_state
1147 	 * around. It's dirty (and needs the state_lock held), but all of the
1148 	 * databases live deep in the nfs4_server_state, so it's the only thing
1149 	 * to legitimately check prior to using anything. The pointers
1150 	 * themselves may be stale.
1151 	 */
1152 	if (nsrv4->nfs4_server_state != NULL) {
1153 		VERIFY(nsrv4->rfs4_client_tab != NULL);
1154 		rfs4_dbe_walk(nsrv4->rfs4_client_tab, rfs4_client_scrub, clr);
1155 		rc = 0;
1156 	} else {
1157 		rc = ENXIO;
1158 	}
1159 	mutex_exit(&nsrv4->state_lock);
1160 	return (rc);
1161 }
1162 
1163 /*
1164  * Used to initialize the NFSv4 server's state or database.  All of
1165  * the tables are created and timers are set.
1166  */
1167 void
1168 rfs4_state_g_init()
1169 {
1170 	extern boolean_t rfs4_cpr_callb(void *, int);
1171 	/*
1172 	 * Add a CPR callback so that we can update client
1173 	 * access times to extend the lease after a suspend
1174 	 * and resume (using the same class as rpcmod/connmgr)
1175 	 */
1176 	cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");
1177 
1178 	/*
1179 	 * NFSv4 server state databases
1180 	 *
1181 	 * Initialized when the module is loaded and used by NFSv4 state
1182 	 * tables.  These kmem_cache free pools are used globally, the NFSv4
1183 	 * state tables which make use of these kmem_cache free pools are per
1184 	 * zone.
1185 	 *
1186 	 * initialize the global kmem_cache free pools which will be used by
1187 	 * the NFSv4 state tables.
1188 	 */
1189 	/* CSTYLED */
1190 	rfs4_client_mem_cache = nfs4_init_mem_cache("Client_entry_cache", 2, sizeof (rfs4_client_t), 0);
1191 	/* CSTYLED */
1192 	rfs4_clntIP_mem_cache = nfs4_init_mem_cache("ClntIP_entry_cache", 1, sizeof (rfs4_clntip_t), 1);
1193 	/* CSTYLED */
1194 	rfs4_openown_mem_cache = nfs4_init_mem_cache("OpenOwner_entry_cache", 1, sizeof (rfs4_openowner_t), 2);
1195 	/* CSTYLED */
1196 	rfs4_openstID_mem_cache = nfs4_init_mem_cache("OpenStateID_entry_cache", 3, sizeof (rfs4_state_t), 3);
1197 	/* CSTYLED */
1198 	rfs4_lockstID_mem_cache = nfs4_init_mem_cache("LockStateID_entry_cache", 3, sizeof (rfs4_lo_state_t), 4);
1199 	/* CSTYLED */
1200 	rfs4_lockown_mem_cache = nfs4_init_mem_cache("Lockowner_entry_cache", 2, sizeof (rfs4_lockowner_t), 5);
1201 	/* CSTYLED */
1202 	rfs4_file_mem_cache = nfs4_init_mem_cache("File_entry_cache", 1, sizeof (rfs4_file_t), 6);
1203 	/* CSTYLED */
1204 	rfs4_delegstID_mem_cache = nfs4_init_mem_cache("DelegStateID_entry_cache", 2, sizeof (rfs4_deleg_state_t), 7);
1205 }
1206 
1207 
1208 /*
1209  * Used at server shutdown to cleanup all of the NFSv4 server's structures
1210  * and other state.
1211  */
1212 void
1213 rfs4_state_g_fini()
1214 {
1215 	int i;
1216 	/*
1217 	 * Cleanup the CPR callback.
1218 	 */
1219 	if (cpr_id)
1220 		(void) callb_delete(cpr_id);
1221 
1222 	/* free the NFSv4 state databases */
1223 	for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
1224 		kmem_cache_destroy(rfs4_db_mem_cache_table[i].r_db_mem_cache);
1225 		rfs4_db_mem_cache_table[i].r_db_mem_cache = NULL;
1226 	}
1227 
1228 	rfs4_client_mem_cache = NULL;
1229 	rfs4_clntIP_mem_cache = NULL;
1230 	rfs4_openown_mem_cache = NULL;
1231 	rfs4_openstID_mem_cache = NULL;
1232 	rfs4_lockstID_mem_cache = NULL;
1233 	rfs4_lockown_mem_cache = NULL;
1234 	rfs4_file_mem_cache = NULL;
1235 	rfs4_delegstID_mem_cache = NULL;
1236 
1237 	/* DSS: distributed stable storage */
1238 	nvlist_free(rfs4_dss_oldpaths);
1239 	nvlist_free(rfs4_dss_paths);
1240 	rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
1241 }
1242 
1243 /*
1244  * Used to initialize the per zone NFSv4 server's state
1245  */
1246 void
1247 rfs4_state_zone_init(nfs4_srv_t *nsrv4)
1248 {
1249 	time_t start_time;
1250 	int start_grace;
1251 	char *dss_path = NFS4_DSS_VAR_DIR;
1252 
1253 	/* DSS: distributed stable storage: initialise served paths list */
1254 	nsrv4->dss_pathlist = NULL;
1255 
1256 	/*
1257 	 * Set the boot time.  If the server
1258 	 * has been restarted quickly and has had the opportunity to
1259 	 * service clients, then the start_time needs to be bumped
1260 	 * regardless.  A small window but it exists...
1261 	 */
1262 	start_time = gethrestime_sec();
1263 	if (nsrv4->rfs4_start_time < start_time)
1264 		nsrv4->rfs4_start_time = start_time;
1265 	else
1266 		nsrv4->rfs4_start_time++;
1267 
1268 	/*
1269 	 * Create the first server instance, or a new one if the server has
1270 	 * been restarted; see above comments on rfs4_start_time. Don't
1271 	 * start its grace period; that will be done later, to maximise the
1272 	 * clients' recovery window.
1273 	 */
1274 	start_grace = 0;
1275 	if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
1276 		int i;
1277 		char **dss_allpaths = NULL;
1278 		dss_allpaths = kmem_alloc(sizeof (char *) *
1279 		    (rfs4_dss_numnewpaths + 1), KM_SLEEP);
1280 		/*
1281 		 * Add the default path into the list of paths for saving
1282 		 * state informantion.
1283 		 */
1284 		dss_allpaths[0] = dss_path;
1285 		for (i = 0; i < rfs4_dss_numnewpaths; i++) {
1286 			dss_allpaths[i + 1] = rfs4_dss_newpaths[i];
1287 		}
1288 		rfs4_servinst_create(nsrv4, start_grace,
1289 		    (rfs4_dss_numnewpaths + 1), dss_allpaths);
1290 		kmem_free(dss_allpaths,
1291 		    (sizeof (char *) * (rfs4_dss_numnewpaths + 1)));
1292 	} else {
1293 		rfs4_servinst_create(nsrv4, start_grace, 1, &dss_path);
1294 	}
1295 
1296 	/* reset the "first NFSv4 request" status */
1297 	nsrv4->seen_first_compound = 0;
1298 
1299 	mutex_enter(&nsrv4->state_lock);
1300 
1301 	/*
1302 	 * If the server state database has already been initialized,
1303 	 * skip it
1304 	 */
1305 	if (nsrv4->nfs4_server_state != NULL) {
1306 		mutex_exit(&nsrv4->state_lock);
1307 		return;
1308 	}
1309 
1310 	rw_init(&nsrv4->rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);
1311 
1312 	/* set the various cache timers for table creation */
1313 	if (nsrv4->rfs4_client_cache_time == 0)
1314 		nsrv4->rfs4_client_cache_time = CLIENT_CACHE_TIME;
1315 	if (nsrv4->rfs4_openowner_cache_time == 0)
1316 		nsrv4->rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
1317 	if (nsrv4->rfs4_state_cache_time == 0)
1318 		nsrv4->rfs4_state_cache_time = STATE_CACHE_TIME;
1319 	if (nsrv4->rfs4_lo_state_cache_time == 0)
1320 		nsrv4->rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
1321 	if (nsrv4->rfs4_lockowner_cache_time == 0)
1322 		nsrv4->rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
1323 	if (nsrv4->rfs4_file_cache_time == 0)
1324 		nsrv4->rfs4_file_cache_time = FILE_CACHE_TIME;
1325 	if (nsrv4->rfs4_deleg_state_cache_time == 0)
1326 		nsrv4->rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
1327 
1328 	/* Create the overall database to hold all server state */
1329 	nsrv4->nfs4_server_state = rfs4_database_create(rfs4_database_debug);
1330 
1331 	/* Now create the individual tables */
1332 	nsrv4->rfs4_client_cache_time *= rfs4_lease_time;
1333 	nsrv4->rfs4_client_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1334 	    "Client",
1335 	    nsrv4->rfs4_client_cache_time,
1336 	    2,
1337 	    rfs4_client_create,
1338 	    rfs4_client_destroy,
1339 	    rfs4_client_expiry,
1340 	    sizeof (rfs4_client_t),
1341 	    TABSIZE,
1342 	    MAXTABSZ/8, 100);
1343 	nsrv4->rfs4_nfsclnt_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1344 	    "nfs_client_id4", nfsclnt_hash,
1345 	    nfsclnt_compare, nfsclnt_mkkey,
1346 	    TRUE);
1347 	nsrv4->rfs4_clientid_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1348 	    "client_id", clientid_hash,
1349 	    clientid_compare, clientid_mkkey,
1350 	    FALSE);
1351 
1352 	nsrv4->rfs4_clntip_cache_time = 86400 * 365;	/* about a year */
1353 	nsrv4->rfs4_clntip_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1354 	    "ClntIP",
1355 	    nsrv4->rfs4_clntip_cache_time,
1356 	    1,
1357 	    rfs4_clntip_create,
1358 	    rfs4_clntip_destroy,
1359 	    rfs4_clntip_expiry,
1360 	    sizeof (rfs4_clntip_t),
1361 	    TABSIZE,
1362 	    MAXTABSZ, 100);
1363 	nsrv4->rfs4_clntip_idx = rfs4_index_create(nsrv4->rfs4_clntip_tab,
1364 	    "client_ip", clntip_hash,
1365 	    clntip_compare, clntip_mkkey,
1366 	    TRUE);
1367 
1368 	nsrv4->rfs4_openowner_cache_time *= rfs4_lease_time;
1369 	nsrv4->rfs4_openowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1370 	    "OpenOwner",
1371 	    nsrv4->rfs4_openowner_cache_time,
1372 	    1,
1373 	    rfs4_openowner_create,
1374 	    rfs4_openowner_destroy,
1375 	    rfs4_openowner_expiry,
1376 	    sizeof (rfs4_openowner_t),
1377 	    TABSIZE,
1378 	    MAXTABSZ, 100);
1379 	nsrv4->rfs4_openowner_idx = rfs4_index_create(nsrv4->rfs4_openowner_tab,
1380 	    "open_owner4", openowner_hash,
1381 	    openowner_compare,
1382 	    openowner_mkkey, TRUE);
1383 
1384 	nsrv4->rfs4_state_cache_time *= rfs4_lease_time;
1385 	nsrv4->rfs4_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1386 	    "OpenStateID",
1387 	    nsrv4->rfs4_state_cache_time,
1388 	    3,
1389 	    rfs4_state_create,
1390 	    rfs4_state_destroy,
1391 	    rfs4_state_expiry,
1392 	    sizeof (rfs4_state_t),
1393 	    TABSIZE,
1394 	    MAXTABSZ, 100);
1395 
1396 	/* CSTYLED */
1397 	nsrv4->rfs4_state_owner_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1398 	    "Openowner-File",
1399 	    state_owner_file_hash,
1400 	    state_owner_file_compare,
1401 	    state_owner_file_mkkey, TRUE);
1402 
1403 	nsrv4->rfs4_state_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1404 	    "State-id", state_hash,
1405 	    state_compare, state_mkkey, FALSE);
1406 
1407 	nsrv4->rfs4_state_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1408 	    "File", state_file_hash,
1409 	    state_file_compare, state_file_mkkey,
1410 	    FALSE);
1411 
1412 	nsrv4->rfs4_lo_state_cache_time *= rfs4_lease_time;
1413 	nsrv4->rfs4_lo_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1414 	    "LockStateID",
1415 	    nsrv4->rfs4_lo_state_cache_time,
1416 	    2,
1417 	    rfs4_lo_state_create,
1418 	    rfs4_lo_state_destroy,
1419 	    rfs4_lo_state_expiry,
1420 	    sizeof (rfs4_lo_state_t),
1421 	    TABSIZE,
1422 	    MAXTABSZ, 100);
1423 
1424 	/* CSTYLED */
1425 	nsrv4->rfs4_lo_state_owner_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1426 	    "lockownerxstate",
1427 	    lo_state_lo_hash,
1428 	    lo_state_lo_compare,
1429 	    lo_state_lo_mkkey, TRUE);
1430 
1431 	nsrv4->rfs4_lo_state_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1432 	    "State-id",
1433 	    lo_state_hash, lo_state_compare,
1434 	    lo_state_mkkey, FALSE);
1435 
1436 	nsrv4->rfs4_lockowner_cache_time *= rfs4_lease_time;
1437 
1438 	nsrv4->rfs4_lockowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1439 	    "Lockowner",
1440 	    nsrv4->rfs4_lockowner_cache_time,
1441 	    2,
1442 	    rfs4_lockowner_create,
1443 	    rfs4_lockowner_destroy,
1444 	    rfs4_lockowner_expiry,
1445 	    sizeof (rfs4_lockowner_t),
1446 	    TABSIZE,
1447 	    MAXTABSZ, 100);
1448 
1449 	nsrv4->rfs4_lockowner_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1450 	    "lock_owner4", lockowner_hash,
1451 	    lockowner_compare,
1452 	    lockowner_mkkey, TRUE);
1453 
1454 	/* CSTYLED */
1455 	nsrv4->rfs4_lockowner_pid_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1456 	    "pid", pid_hash,
1457 	    pid_compare, pid_mkkey,
1458 	    FALSE);
1459 
1460 	nsrv4->rfs4_file_cache_time *= rfs4_lease_time;
1461 	nsrv4->rfs4_file_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1462 	    "File",
1463 	    nsrv4->rfs4_file_cache_time,
1464 	    1,
1465 	    rfs4_file_create,
1466 	    rfs4_file_destroy,
1467 	    NULL,
1468 	    sizeof (rfs4_file_t),
1469 	    TABSIZE,
1470 	    MAXTABSZ, -1);
1471 
1472 	nsrv4->rfs4_file_idx = rfs4_index_create(nsrv4->rfs4_file_tab,
1473 	    "Filehandle", file_hash,
1474 	    file_compare, file_mkkey, TRUE);
1475 
1476 	nsrv4->rfs4_deleg_state_cache_time *= rfs4_lease_time;
1477 	/* CSTYLED */
1478 	nsrv4->rfs4_deleg_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1479 	    "DelegStateID",
1480 	    nsrv4->rfs4_deleg_state_cache_time,
1481 	    2,
1482 	    rfs4_deleg_state_create,
1483 	    rfs4_deleg_state_destroy,
1484 	    rfs4_deleg_state_expiry,
1485 	    sizeof (rfs4_deleg_state_t),
1486 	    TABSIZE,
1487 	    MAXTABSZ, 100);
1488 	nsrv4->rfs4_deleg_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1489 	    "DelegByFileClient",
1490 	    deleg_hash,
1491 	    deleg_compare,
1492 	    deleg_mkkey, TRUE);
1493 
1494 	/* CSTYLED */
1495 	nsrv4->rfs4_deleg_state_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1496 	    "DelegState",
1497 	    deleg_state_hash,
1498 	    deleg_state_compare,
1499 	    deleg_state_mkkey, FALSE);
1500 
1501 	mutex_exit(&nsrv4->state_lock);
1502 
1503 	/*
1504 	 * Init the stable storage.
1505 	 */
1506 	rfs4_ss_init(nsrv4);
1507 }
1508 
1509 /*
1510  * Used at server shutdown to cleanup all of NFSv4 server's zone structures
1511  * and state.
1512  */
1513 void
1514 rfs4_state_zone_fini()
1515 {
1516 	rfs4_database_t *dbp;
1517 	nfs4_srv_t *nsrv4;
1518 	nsrv4 = nfs4_get_srv();
1519 
1520 	rfs4_set_deleg_policy(nsrv4, SRV_NEVER_DELEGATE);
1521 
1522 	/*
1523 	 * Clean up any dangling stable storage structures BEFORE calling
1524 	 * rfs4_servinst_destroy_all() so there are no dangling structures
1525 	 * (i.e. the srvinsts are all cleared of danglers BEFORE they get
1526 	 * freed).
1527 	 */
1528 	rfs4_ss_fini(nsrv4);
1529 
1530 	mutex_enter(&nsrv4->state_lock);
1531 
1532 	if (nsrv4->nfs4_server_state == NULL) {
1533 		mutex_exit(&nsrv4->state_lock);
1534 		return;
1535 	}
1536 
1537 	/* destroy server instances and current instance ptr */
1538 	rfs4_servinst_destroy_all(nsrv4);
1539 
1540 	/* reset the "first NFSv4 request" status */
1541 	nsrv4->seen_first_compound = 0;
1542 
1543 	dbp = nsrv4->nfs4_server_state;
1544 	nsrv4->nfs4_server_state = NULL;
1545 
1546 	rw_destroy(&nsrv4->rfs4_findclient_lock);
1547 
1548 	/* First stop all of the reaper threads in the database */
1549 	rfs4_database_shutdown(dbp);
1550 
1551 	/*
1552 	 * WARNING: There may be consumers of the rfs4 database still
1553 	 * active as we destroy these.  IF that's the case, consider putting
1554 	 * some of their _zone_fini()-like functions into the zsd key as
1555 	 * ~~SHUTDOWN~~ functions instead of ~~DESTROY~~ functions.  We can
1556 	 * maintain some ordering guarantees better that way.
1557 	 */
1558 	/* Now destroy/release the database tables */
1559 	rfs4_database_destroy(dbp);
1560 
1561 	/* Reset the cache timers for next time */
1562 	nsrv4->rfs4_client_cache_time = 0;
1563 	nsrv4->rfs4_openowner_cache_time = 0;
1564 	nsrv4->rfs4_state_cache_time = 0;
1565 	nsrv4->rfs4_lo_state_cache_time = 0;
1566 	nsrv4->rfs4_lockowner_cache_time = 0;
1567 	nsrv4->rfs4_file_cache_time = 0;
1568 	nsrv4->rfs4_deleg_state_cache_time = 0;
1569 
1570 	mutex_exit(&nsrv4->state_lock);
1571 }
1572 
1573 typedef union {
1574 	struct {
1575 		uint32_t start_time;
1576 		uint32_t c_id;
1577 	} impl_id;
1578 	clientid4 id4;
1579 } cid;
1580 
1581 static int foreign_stateid(stateid_t *id);
1582 static int foreign_clientid(cid *cidp);
1583 static void embed_nodeid(cid *cidp);
1584 
1585 typedef union {
1586 	struct {
1587 		uint32_t c_id;
1588 		uint32_t gen_num;
1589 	} cv_impl;
1590 	verifier4	confirm_verf;
1591 } scid_confirm_verf;
1592 
1593 static uint32_t
1594 clientid_hash(void *key)
1595 {
1596 	cid *idp = key;
1597 
1598 	return (idp->impl_id.c_id);
1599 }
1600 
1601 static bool_t
1602 clientid_compare(rfs4_entry_t entry, void *key)
1603 {
1604 	rfs4_client_t *cp = (rfs4_client_t *)entry;
1605 	clientid4 *idp = key;
1606 
1607 	return (*idp == cp->rc_clientid);
1608 }
1609 
1610 static void *
1611 clientid_mkkey(rfs4_entry_t entry)
1612 {
1613 	rfs4_client_t *cp = (rfs4_client_t *)entry;
1614 
1615 	return (&cp->rc_clientid);
1616 }
1617 
1618 static uint32_t
1619 nfsclnt_hash(void *key)
1620 {
1621 	nfs_client_id4 *client = key;
1622 	int i;
1623 	uint32_t hash = 0;
1624 
1625 	for (i = 0; i < client->id_len; i++) {
1626 		hash <<= 1;
1627 		hash += (uint_t)client->id_val[i];
1628 	}
1629 	return (hash);
1630 }
1631 
1632 
1633 static bool_t
1634 nfsclnt_compare(rfs4_entry_t entry, void *key)
1635 {
1636 	rfs4_client_t *cp = (rfs4_client_t *)entry;
1637 	nfs_client_id4 *nfs_client = key;
1638 
1639 	if (cp->rc_nfs_client.id_len != nfs_client->id_len)
1640 		return (FALSE);
1641 
1642 	return (bcmp(cp->rc_nfs_client.id_val, nfs_client->id_val,
1643 	    nfs_client->id_len) == 0);
1644 }
1645 
1646 static void *
1647 nfsclnt_mkkey(rfs4_entry_t entry)
1648 {
1649 	rfs4_client_t *cp = (rfs4_client_t *)entry;
1650 
1651 	return (&cp->rc_nfs_client);
1652 }
1653 
1654 static bool_t
1655 rfs4_client_expiry(rfs4_entry_t u_entry)
1656 {
1657 	rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1658 	bool_t cp_expired;
1659 
1660 	if (rfs4_dbe_is_invalid(cp->rc_dbe)) {
1661 		cp->rc_ss_remove = 1;
1662 		return (TRUE);
1663 	}
1664 	/*
1665 	 * If the sysadmin has used clear_locks for this
1666 	 * entry then forced_expire will be set and we
1667 	 * want this entry to be reaped. Or the entry
1668 	 * has exceeded its lease period.
1669 	 */
1670 	cp_expired = (cp->rc_forced_expire ||
1671 	    (gethrestime_sec() - cp->rc_last_access
1672 	    > rfs4_lease_time));
1673 
1674 	if (!cp->rc_ss_remove && cp_expired)
1675 		cp->rc_ss_remove = 1;
1676 	return (cp_expired);
1677 }
1678 
1679 /*
1680  * Remove the leaf file from all distributed stable storage paths.
1681  */
1682 static void
1683 rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
1684 {
1685 	nfs4_srv_t *nsrv4;
1686 	rfs4_servinst_t *sip;
1687 	char *leaf = cp->rc_ss_pn->leaf;
1688 
1689 	/*
1690 	 * since the state files are written to all DSS
1691 	 * paths we must remove this leaf file instance
1692 	 * from all server instances.
1693 	 */
1694 
1695 	nsrv4 = nfs4_get_srv();
1696 	mutex_enter(&nsrv4->servinst_lock);
1697 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
1698 		/* remove the leaf file associated with this server instance */
1699 		rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf);
1700 	}
1701 	mutex_exit(&nsrv4->servinst_lock);
1702 }
1703 
1704 static void
1705 rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf)
1706 {
1707 	int i, npaths = sip->dss_npaths;
1708 
1709 	for (i = 0; i < npaths; i++) {
1710 		rfs4_dss_path_t *dss_path = sip->dss_paths[i];
1711 		char *path, *dir;
1712 		size_t pathlen;
1713 
1714 		/* the HA-NFSv4 path might have been failed-over away from us */
1715 		if (dss_path == NULL)
1716 			continue;
1717 
1718 		dir = dss_path->path;
1719 
1720 		/* allow 3 extra bytes for two '/' & a NUL */
1721 		pathlen = strlen(dir) + strlen(dir_leaf) + strlen(leaf) + 3;
1722 		path = kmem_alloc(pathlen, KM_SLEEP);
1723 		(void) sprintf(path, "%s/%s/%s", dir, dir_leaf, leaf);
1724 
1725 		(void) vn_remove(path, UIO_SYSSPACE, RMFILE);
1726 
1727 		kmem_free(path, pathlen);
1728 	}
1729 }
1730 
1731 static void
1732 rfs4_client_destroy(rfs4_entry_t u_entry)
1733 {
1734 	rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1735 
1736 	mutex_destroy(cp->rc_cbinfo.cb_lock);
1737 	cv_destroy(cp->rc_cbinfo.cb_cv);
1738 	cv_destroy(cp->rc_cbinfo.cb_cv_nullcaller);
1739 	list_destroy(&cp->rc_openownerlist);
1740 
1741 	/* free callback info */
1742 	rfs4_cbinfo_free(&cp->rc_cbinfo);
1743 
1744 	if (cp->rc_cp_confirmed)
1745 		rfs4_client_rele(cp->rc_cp_confirmed);
1746 
1747 	if (cp->rc_ss_pn) {
1748 		/* check if the stable storage files need to be removed */
1749 		if (cp->rc_ss_remove)
1750 			rfs4_dss_remove_cpleaf(cp);
1751 		rfs4_ss_pnfree(cp->rc_ss_pn);
1752 	}
1753 
1754 	/* Free the client supplied client id */
1755 	kmem_free(cp->rc_nfs_client.id_val, cp->rc_nfs_client.id_len);
1756 
1757 	if (cp->rc_sysidt != LM_NOSYSID)
1758 		lm_free_sysidt(cp->rc_sysidt);
1759 }
1760 
1761 static bool_t
1762 rfs4_client_create(rfs4_entry_t u_entry, void *arg)
1763 {
1764 	rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1765 	nfs_client_id4 *client = (nfs_client_id4 *)arg;
1766 	struct sockaddr *ca;
1767 	cid *cidp;
1768 	scid_confirm_verf *scvp;
1769 	nfs4_srv_t *nsrv4;
1770 
1771 	nsrv4 = nfs4_get_srv();
1772 
1773 	/* Get a clientid to give to the client */
1774 	cidp = (cid *)&cp->rc_clientid;
1775 	cidp->impl_id.start_time = nsrv4->rfs4_start_time;
1776 	cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe);
1777 
1778 	/* If we are booted as a cluster node, embed our nodeid */
1779 	if (cluster_bootflags & CLUSTER_BOOTED)
1780 		embed_nodeid(cidp);
1781 
1782 	/* Allocate and copy client's client id value */
1783 	cp->rc_nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP);
1784 	cp->rc_nfs_client.id_len = client->id_len;
1785 	bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len);
1786 	cp->rc_nfs_client.verifier = client->verifier;
1787 
1788 	/* Copy client's IP address */
1789 	ca = client->cl_addr;
1790 	if (ca->sa_family == AF_INET)
1791 		bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in));
1792 	else if (ca->sa_family == AF_INET6)
1793 		bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in6));
1794 	cp->rc_nfs_client.cl_addr = (struct sockaddr *)&cp->rc_addr;
1795 
1796 	/* Init the value for the SETCLIENTID_CONFIRM verifier */
1797 	scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1798 	scvp->cv_impl.c_id = cidp->impl_id.c_id;
1799 	scvp->cv_impl.gen_num = 0;
1800 
1801 	/* An F_UNLKSYS has been done for this client */
1802 	cp->rc_unlksys_completed = FALSE;
1803 
1804 	/* We need the client to ack us */
1805 	cp->rc_need_confirm = TRUE;
1806 	cp->rc_cp_confirmed = NULL;
1807 
1808 	/* TRUE all the time until the callback path actually fails */
1809 	cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
1810 
1811 	/* Initialize the access time to now */
1812 	cp->rc_last_access = gethrestime_sec();
1813 
1814 	cp->rc_cr_set = NULL;
1815 
1816 	cp->rc_sysidt = LM_NOSYSID;
1817 
1818 	list_create(&cp->rc_openownerlist, sizeof (rfs4_openowner_t),
1819 	    offsetof(rfs4_openowner_t, ro_node));
1820 
1821 	/* set up the callback control structure */
1822 	cp->rc_cbinfo.cb_state = CB_UNINIT;
1823 	mutex_init(cp->rc_cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL);
1824 	cv_init(cp->rc_cbinfo.cb_cv, NULL, CV_DEFAULT, NULL);
1825 	cv_init(cp->rc_cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL);
1826 
1827 	/*
1828 	 * Associate the client_t with the current server instance.
1829 	 * The hold is solely to satisfy the calling requirement of
1830 	 * rfs4_servinst_assign(). In this case it's not strictly necessary.
1831 	 */
1832 	rfs4_dbe_hold(cp->rc_dbe);
1833 	rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
1834 	rfs4_dbe_rele(cp->rc_dbe);
1835 
1836 	return (TRUE);
1837 }
1838 
1839 /*
1840  * Caller wants to generate/update the setclientid_confirm verifier
1841  * associated with a client.  This is done during the SETCLIENTID
1842  * processing.
1843  */
1844 void
1845 rfs4_client_scv_next(rfs4_client_t *cp)
1846 {
1847 	scid_confirm_verf *scvp;
1848 
1849 	/* Init the value for the SETCLIENTID_CONFIRM verifier */
1850 	scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1851 	scvp->cv_impl.gen_num++;
1852 }
1853 
1854 void
1855 rfs4_client_rele(rfs4_client_t *cp)
1856 {
1857 	rfs4_dbe_rele(cp->rc_dbe);
1858 }
1859 
1860 rfs4_client_t *
1861 rfs4_findclient(nfs_client_id4 *client, bool_t *create,	rfs4_client_t *oldcp)
1862 {
1863 	rfs4_client_t *cp;
1864 	nfs4_srv_t *nsrv4;
1865 	nsrv4 = nfs4_get_srv();
1866 
1867 
1868 	if (oldcp) {
1869 		rw_enter(&nsrv4->rfs4_findclient_lock, RW_WRITER);
1870 		rfs4_dbe_hide(oldcp->rc_dbe);
1871 	} else {
1872 		rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1873 	}
1874 
1875 	cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_nfsclnt_idx, client,
1876 	    create, (void *)client, RFS4_DBS_VALID);
1877 
1878 	if (oldcp)
1879 		rfs4_dbe_unhide(oldcp->rc_dbe);
1880 
1881 	rw_exit(&nsrv4->rfs4_findclient_lock);
1882 
1883 	return (cp);
1884 }
1885 
1886 rfs4_client_t *
1887 rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed)
1888 {
1889 	rfs4_client_t *cp;
1890 	bool_t create = FALSE;
1891 	cid *cidp = (cid *)&clientid;
1892 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
1893 
1894 	/* If we're a cluster and the nodeid isn't right, short-circuit */
1895 	if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
1896 		return (NULL);
1897 
1898 	rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1899 
1900 	cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx, &clientid,
1901 	    &create, NULL, RFS4_DBS_VALID);
1902 
1903 	rw_exit(&nsrv4->rfs4_findclient_lock);
1904 
1905 	if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) {
1906 		rfs4_client_rele(cp);
1907 		return (NULL);
1908 	} else {
1909 		return (cp);
1910 	}
1911 }
1912 
1913 static uint32_t
1914 clntip_hash(void *key)
1915 {
1916 	struct sockaddr *addr = key;
1917 	int i, len = 0;
1918 	uint32_t hash = 0;
1919 	char *ptr;
1920 
1921 	if (addr->sa_family == AF_INET) {
1922 		struct sockaddr_in *a = (struct sockaddr_in *)addr;
1923 		len = sizeof (struct in_addr);
1924 		ptr = (char *)&a->sin_addr;
1925 	} else if (addr->sa_family == AF_INET6) {
1926 		struct sockaddr_in6 *a = (struct sockaddr_in6 *)addr;
1927 		len = sizeof (struct in6_addr);
1928 		ptr = (char *)&a->sin6_addr;
1929 	} else
1930 		return (0);
1931 
1932 	for (i = 0; i < len; i++) {
1933 		hash <<= 1;
1934 		hash += (uint_t)ptr[i];
1935 	}
1936 	return (hash);
1937 }
1938 
1939 static bool_t
1940 clntip_compare(rfs4_entry_t entry, void *key)
1941 {
1942 	rfs4_clntip_t *cp = (rfs4_clntip_t *)entry;
1943 	struct sockaddr *addr = key;
1944 	int len = 0;
1945 	char *p1, *p2;
1946 
1947 	if (addr->sa_family == AF_INET) {
1948 		struct sockaddr_in *a1 = (struct sockaddr_in *)&cp->ri_addr;
1949 		struct sockaddr_in *a2 = (struct sockaddr_in *)addr;
1950 		len = sizeof (struct in_addr);
1951 		p1 = (char *)&a1->sin_addr;
1952 		p2 = (char *)&a2->sin_addr;
1953 	} else if (addr->sa_family == AF_INET6) {
1954 		struct sockaddr_in6 *a1 = (struct sockaddr_in6 *)&cp->ri_addr;
1955 		struct sockaddr_in6 *a2 = (struct sockaddr_in6 *)addr;
1956 		len = sizeof (struct in6_addr);
1957 		p1 = (char *)&a1->sin6_addr;
1958 		p2 = (char *)&a2->sin6_addr;
1959 	} else
1960 		return (0);
1961 
1962 	return (bcmp(p1, p2, len) == 0);
1963 }
1964 
1965 static void *
1966 clntip_mkkey(rfs4_entry_t entry)
1967 {
1968 	rfs4_clntip_t *cp = (rfs4_clntip_t *)entry;
1969 
1970 	return (&cp->ri_addr);
1971 }
1972 
1973 static bool_t
1974 rfs4_clntip_expiry(rfs4_entry_t u_entry)
1975 {
1976 	rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1977 
1978 	if (rfs4_dbe_is_invalid(cp->ri_dbe))
1979 		return (TRUE);
1980 	return (FALSE);
1981 }
1982 
1983 /* ARGSUSED */
1984 static void
1985 rfs4_clntip_destroy(rfs4_entry_t u_entry)
1986 {
1987 }
1988 
1989 static bool_t
1990 rfs4_clntip_create(rfs4_entry_t u_entry, void *arg)
1991 {
1992 	rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1993 	struct sockaddr *ca = (struct sockaddr *)arg;
1994 
1995 	/* Copy client's IP address */
1996 	if (ca->sa_family == AF_INET)
1997 		bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in));
1998 	else if (ca->sa_family == AF_INET6)
1999 		bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in6));
2000 	else
2001 		return (FALSE);
2002 	cp->ri_no_referrals = 1;
2003 
2004 	return (TRUE);
2005 }
2006 
2007 rfs4_clntip_t *
2008 rfs4_find_clntip(struct sockaddr *addr, bool_t *create)
2009 {
2010 	rfs4_clntip_t *cp;
2011 	nfs4_srv_t *nsrv4;
2012 
2013 	nsrv4 = nfs4_get_srv();
2014 
2015 	rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2016 
2017 	cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
2018 	    create, addr, RFS4_DBS_VALID);
2019 
2020 	rw_exit(&nsrv4->rfs4_findclient_lock);
2021 
2022 	return (cp);
2023 }
2024 
2025 void
2026 rfs4_invalidate_clntip(struct sockaddr *addr)
2027 {
2028 	rfs4_clntip_t *cp;
2029 	bool_t create = FALSE;
2030 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2031 
2032 	rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2033 
2034 	cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
2035 	    &create, NULL, RFS4_DBS_VALID);
2036 	if (cp == NULL) {
2037 		rw_exit(&nsrv4->rfs4_findclient_lock);
2038 		return;
2039 	}
2040 	rfs4_dbe_invalidate(cp->ri_dbe);
2041 	rfs4_dbe_rele(cp->ri_dbe);
2042 
2043 	rw_exit(&nsrv4->rfs4_findclient_lock);
2044 }
2045 
2046 bool_t
2047 rfs4_lease_expired(rfs4_client_t *cp)
2048 {
2049 	bool_t rc;
2050 
2051 	rfs4_dbe_lock(cp->rc_dbe);
2052 
2053 	/*
2054 	 * If the admin has executed clear_locks for this
2055 	 * client id, force expire will be set, so no need
2056 	 * to calculate anything because it's "outa here".
2057 	 */
2058 	if (cp->rc_forced_expire) {
2059 		rc = TRUE;
2060 	} else {
2061 		rc = (gethrestime_sec() - cp->rc_last_access > rfs4_lease_time);
2062 	}
2063 
2064 	/*
2065 	 * If the lease has expired we will also want
2066 	 * to remove any stable storage state data. So
2067 	 * mark the client id accordingly.
2068 	 */
2069 	if (!cp->rc_ss_remove)
2070 		cp->rc_ss_remove = (rc == TRUE);
2071 
2072 	rfs4_dbe_unlock(cp->rc_dbe);
2073 
2074 	return (rc);
2075 }
2076 
2077 void
2078 rfs4_update_lease(rfs4_client_t *cp)
2079 {
2080 	rfs4_dbe_lock(cp->rc_dbe);
2081 	if (!cp->rc_forced_expire)
2082 		cp->rc_last_access = gethrestime_sec();
2083 	rfs4_dbe_unlock(cp->rc_dbe);
2084 }
2085 
2086 
2087 static bool_t
2088 EQOPENOWNER(open_owner4 *a, open_owner4 *b)
2089 {
2090 	bool_t rc;
2091 
2092 	if (a->clientid != b->clientid)
2093 		return (FALSE);
2094 
2095 	if (a->owner_len != b->owner_len)
2096 		return (FALSE);
2097 
2098 	rc = (bcmp(a->owner_val, b->owner_val, a->owner_len) == 0);
2099 
2100 	return (rc);
2101 }
2102 
2103 static uint_t
2104 openowner_hash(void *key)
2105 {
2106 	int i;
2107 	open_owner4 *openowner = key;
2108 	uint_t hash = 0;
2109 
2110 	for (i = 0; i < openowner->owner_len; i++) {
2111 		hash <<= 4;
2112 		hash += (uint_t)openowner->owner_val[i];
2113 	}
2114 	hash += (uint_t)openowner->clientid;
2115 	hash |= (openowner->clientid >> 32);
2116 
2117 	return (hash);
2118 }
2119 
2120 static bool_t
2121 openowner_compare(rfs4_entry_t u_entry, void *key)
2122 {
2123 	rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2124 	open_owner4 *arg = key;
2125 
2126 	return (EQOPENOWNER(&oo->ro_owner, arg));
2127 }
2128 
2129 void *
2130 openowner_mkkey(rfs4_entry_t u_entry)
2131 {
2132 	rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2133 
2134 	return (&oo->ro_owner);
2135 }
2136 
2137 /* ARGSUSED */
2138 static bool_t
2139 rfs4_openowner_expiry(rfs4_entry_t u_entry)
2140 {
2141 	/* openstateid held us and did all needed delay */
2142 	return (TRUE);
2143 }
2144 
2145 static void
2146 rfs4_openowner_destroy(rfs4_entry_t u_entry)
2147 {
2148 	rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2149 
2150 	/* Remove open owner from client's lists of open owners */
2151 	rfs4_dbe_lock(oo->ro_client->rc_dbe);
2152 	list_remove(&oo->ro_client->rc_openownerlist, oo);
2153 	rfs4_dbe_unlock(oo->ro_client->rc_dbe);
2154 
2155 	/* One less reference to the client */
2156 	rfs4_client_rele(oo->ro_client);
2157 	oo->ro_client = NULL;
2158 
2159 	/* Free the last reply for this lock owner */
2160 	rfs4_free_reply(&oo->ro_reply);
2161 
2162 	if (oo->ro_reply_fh.nfs_fh4_val) {
2163 		kmem_free(oo->ro_reply_fh.nfs_fh4_val,
2164 		    oo->ro_reply_fh.nfs_fh4_len);
2165 		oo->ro_reply_fh.nfs_fh4_val = NULL;
2166 		oo->ro_reply_fh.nfs_fh4_len = 0;
2167 	}
2168 
2169 	rfs4_sw_destroy(&oo->ro_sw);
2170 	list_destroy(&oo->ro_statelist);
2171 
2172 	/* Free the lock owner id */
2173 	kmem_free(oo->ro_owner.owner_val, oo->ro_owner.owner_len);
2174 }
2175 
2176 void
2177 rfs4_openowner_rele(rfs4_openowner_t *oo)
2178 {
2179 	rfs4_dbe_rele(oo->ro_dbe);
2180 }
2181 
2182 static bool_t
2183 rfs4_openowner_create(rfs4_entry_t u_entry, void *arg)
2184 {
2185 	rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2186 	rfs4_openowner_t *argp = (rfs4_openowner_t *)arg;
2187 	open_owner4 *openowner = &argp->ro_owner;
2188 	seqid4 seqid = argp->ro_open_seqid;
2189 	rfs4_client_t *cp;
2190 	bool_t create = FALSE;
2191 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2192 
2193 	rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2194 
2195 	cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2196 	    &openowner->clientid,
2197 	    &create, NULL, RFS4_DBS_VALID);
2198 
2199 	rw_exit(&nsrv4->rfs4_findclient_lock);
2200 
2201 	if (cp == NULL)
2202 		return (FALSE);
2203 
2204 	oo->ro_reply_fh.nfs_fh4_len = 0;
2205 	oo->ro_reply_fh.nfs_fh4_val = NULL;
2206 
2207 	oo->ro_owner.clientid = openowner->clientid;
2208 	oo->ro_owner.owner_val =
2209 	    kmem_alloc(openowner->owner_len, KM_SLEEP);
2210 
2211 	bcopy(openowner->owner_val,
2212 	    oo->ro_owner.owner_val, openowner->owner_len);
2213 
2214 	oo->ro_owner.owner_len = openowner->owner_len;
2215 
2216 	oo->ro_need_confirm = TRUE;
2217 
2218 	rfs4_sw_init(&oo->ro_sw);
2219 
2220 	oo->ro_open_seqid = seqid;
2221 	bzero(&oo->ro_reply, sizeof (nfs_resop4));
2222 	oo->ro_client = cp;
2223 	oo->ro_cr_set = NULL;
2224 
2225 	list_create(&oo->ro_statelist, sizeof (rfs4_state_t),
2226 	    offsetof(rfs4_state_t, rs_node));
2227 
2228 	/* Insert openowner into client's open owner list */
2229 	rfs4_dbe_lock(cp->rc_dbe);
2230 	list_insert_tail(&cp->rc_openownerlist, oo);
2231 	rfs4_dbe_unlock(cp->rc_dbe);
2232 
2233 	return (TRUE);
2234 }
2235 
2236 rfs4_openowner_t *
2237 rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid)
2238 {
2239 	rfs4_openowner_t *oo;
2240 	rfs4_openowner_t arg;
2241 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2242 
2243 	arg.ro_owner = *openowner;
2244 	arg.ro_open_seqid = seqid;
2245 	/* CSTYLED */
2246 	oo = (rfs4_openowner_t *)rfs4_dbsearch(nsrv4->rfs4_openowner_idx, openowner,
2247 	    create, &arg, RFS4_DBS_VALID);
2248 
2249 	return (oo);
2250 }
2251 
2252 void
2253 rfs4_update_open_sequence(rfs4_openowner_t *oo)
2254 {
2255 
2256 	rfs4_dbe_lock(oo->ro_dbe);
2257 
2258 	oo->ro_open_seqid++;
2259 
2260 	rfs4_dbe_unlock(oo->ro_dbe);
2261 }
2262 
2263 void
2264 rfs4_update_open_resp(rfs4_openowner_t *oo, nfs_resop4 *resp, nfs_fh4 *fh)
2265 {
2266 
2267 	rfs4_dbe_lock(oo->ro_dbe);
2268 
2269 	rfs4_free_reply(&oo->ro_reply);
2270 
2271 	rfs4_copy_reply(&oo->ro_reply, resp);
2272 
2273 	/* Save the filehandle if provided and free if not used */
2274 	if (resp->nfs_resop4_u.opopen.status == NFS4_OK &&
2275 	    fh && fh->nfs_fh4_len) {
2276 		if (oo->ro_reply_fh.nfs_fh4_val == NULL)
2277 			oo->ro_reply_fh.nfs_fh4_val =
2278 			    kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
2279 		nfs_fh4_copy(fh, &oo->ro_reply_fh);
2280 	} else {
2281 		if (oo->ro_reply_fh.nfs_fh4_val) {
2282 			kmem_free(oo->ro_reply_fh.nfs_fh4_val,
2283 			    oo->ro_reply_fh.nfs_fh4_len);
2284 			oo->ro_reply_fh.nfs_fh4_val = NULL;
2285 			oo->ro_reply_fh.nfs_fh4_len = 0;
2286 		}
2287 	}
2288 
2289 	rfs4_dbe_unlock(oo->ro_dbe);
2290 }
2291 
2292 static bool_t
2293 lockowner_compare(rfs4_entry_t u_entry, void *key)
2294 {
2295 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2296 	lock_owner4 *b = (lock_owner4 *)key;
2297 
2298 	if (lo->rl_owner.clientid != b->clientid)
2299 		return (FALSE);
2300 
2301 	if (lo->rl_owner.owner_len != b->owner_len)
2302 		return (FALSE);
2303 
2304 	return (bcmp(lo->rl_owner.owner_val, b->owner_val,
2305 	    lo->rl_owner.owner_len) == 0);
2306 }
2307 
2308 void *
2309 lockowner_mkkey(rfs4_entry_t u_entry)
2310 {
2311 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2312 
2313 	return (&lo->rl_owner);
2314 }
2315 
2316 static uint32_t
2317 lockowner_hash(void *key)
2318 {
2319 	int i;
2320 	lock_owner4 *lockowner = key;
2321 	uint_t hash = 0;
2322 
2323 	for (i = 0; i < lockowner->owner_len; i++) {
2324 		hash <<= 4;
2325 		hash += (uint_t)lockowner->owner_val[i];
2326 	}
2327 	hash += (uint_t)lockowner->clientid;
2328 	hash |= (lockowner->clientid >> 32);
2329 
2330 	return (hash);
2331 }
2332 
2333 static uint32_t
2334 pid_hash(void *key)
2335 {
2336 	return ((uint32_t)(uintptr_t)key);
2337 }
2338 
2339 static void *
2340 pid_mkkey(rfs4_entry_t u_entry)
2341 {
2342 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2343 
2344 	return ((void *)(uintptr_t)lo->rl_pid);
2345 }
2346 
2347 static bool_t
2348 pid_compare(rfs4_entry_t u_entry, void *key)
2349 {
2350 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2351 
2352 	return (lo->rl_pid == (pid_t)(uintptr_t)key);
2353 }
2354 
2355 static void
2356 rfs4_lockowner_destroy(rfs4_entry_t u_entry)
2357 {
2358 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2359 
2360 	/* Free the lock owner id */
2361 	kmem_free(lo->rl_owner.owner_val, lo->rl_owner.owner_len);
2362 	rfs4_client_rele(lo->rl_client);
2363 }
2364 
2365 void
2366 rfs4_lockowner_rele(rfs4_lockowner_t *lo)
2367 {
2368 	rfs4_dbe_rele(lo->rl_dbe);
2369 }
2370 
2371 /* ARGSUSED */
2372 static bool_t
2373 rfs4_lockowner_expiry(rfs4_entry_t u_entry)
2374 {
2375 	/*
2376 	 * Since expiry is called with no other references on
2377 	 * this struct, go ahead and have it removed.
2378 	 */
2379 	return (TRUE);
2380 }
2381 
2382 static bool_t
2383 rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg)
2384 {
2385 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2386 	lock_owner4 *lockowner = (lock_owner4 *)arg;
2387 	rfs4_client_t *cp;
2388 	bool_t create = FALSE;
2389 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2390 
2391 	rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2392 
2393 	cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2394 	    &lockowner->clientid,
2395 	    &create, NULL, RFS4_DBS_VALID);
2396 
2397 	rw_exit(&nsrv4->rfs4_findclient_lock);
2398 
2399 	if (cp == NULL)
2400 		return (FALSE);
2401 
2402 	/* Reference client */
2403 	lo->rl_client = cp;
2404 	lo->rl_owner.clientid = lockowner->clientid;
2405 	lo->rl_owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP);
2406 	bcopy(lockowner->owner_val, lo->rl_owner.owner_val,
2407 	    lockowner->owner_len);
2408 	lo->rl_owner.owner_len = lockowner->owner_len;
2409 	lo->rl_pid = rfs4_dbe_getid(lo->rl_dbe);
2410 
2411 	return (TRUE);
2412 }
2413 
2414 rfs4_lockowner_t *
2415 rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create)
2416 {
2417 	rfs4_lockowner_t *lo;
2418 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2419 
2420 	/* CSTYLED */
2421 	lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_idx, lockowner,
2422 	    create, lockowner, RFS4_DBS_VALID);
2423 
2424 	return (lo);
2425 }
2426 
2427 rfs4_lockowner_t *
2428 rfs4_findlockowner_by_pid(pid_t pid)
2429 {
2430 	rfs4_lockowner_t *lo;
2431 	bool_t create = FALSE;
2432 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2433 
2434 	lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_pid_idx,
2435 	    (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID);
2436 
2437 	return (lo);
2438 }
2439 
2440 
2441 static uint32_t
2442 file_hash(void *key)
2443 {
2444 	return (ADDRHASH(key));
2445 }
2446 
2447 static void *
2448 file_mkkey(rfs4_entry_t u_entry)
2449 {
2450 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2451 
2452 	return (fp->rf_vp);
2453 }
2454 
2455 static bool_t
2456 file_compare(rfs4_entry_t u_entry, void *key)
2457 {
2458 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2459 
2460 	return (fp->rf_vp == (vnode_t *)key);
2461 }
2462 
2463 static void
2464 rfs4_file_destroy(rfs4_entry_t u_entry)
2465 {
2466 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2467 
2468 	list_destroy(&fp->rf_delegstatelist);
2469 
2470 	if (fp->rf_filehandle.nfs_fh4_val)
2471 		kmem_free(fp->rf_filehandle.nfs_fh4_val,
2472 		    fp->rf_filehandle.nfs_fh4_len);
2473 	cv_destroy(fp->rf_dinfo.rd_recall_cv);
2474 	if (fp->rf_vp) {
2475 		vnode_t *vp = fp->rf_vp;
2476 
2477 		mutex_enter(&vp->v_vsd_lock);
2478 		(void) vsd_set(vp, nfs4_srv_vkey, NULL);
2479 		mutex_exit(&vp->v_vsd_lock);
2480 		VN_RELE(vp);
2481 		fp->rf_vp = NULL;
2482 	}
2483 	rw_destroy(&fp->rf_file_rwlock);
2484 }
2485 
2486 /*
2487  * Used to unlock the underlying dbe struct only
2488  */
2489 void
2490 rfs4_file_rele(rfs4_file_t *fp)
2491 {
2492 	rfs4_dbe_rele(fp->rf_dbe);
2493 }
2494 
2495 typedef struct {
2496     vnode_t *vp;
2497     nfs_fh4 *fh;
2498 } rfs4_fcreate_arg;
2499 
2500 static bool_t
2501 rfs4_file_create(rfs4_entry_t u_entry, void *arg)
2502 {
2503 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2504 	rfs4_fcreate_arg *ap = (rfs4_fcreate_arg *)arg;
2505 	vnode_t *vp = ap->vp;
2506 	nfs_fh4 *fh = ap->fh;
2507 
2508 	VN_HOLD(vp);
2509 
2510 	fp->rf_filehandle.nfs_fh4_len = 0;
2511 	fp->rf_filehandle.nfs_fh4_val = NULL;
2512 	ASSERT(fh && fh->nfs_fh4_len);
2513 	if (fh && fh->nfs_fh4_len) {
2514 		fp->rf_filehandle.nfs_fh4_val =
2515 		    kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
2516 		nfs_fh4_copy(fh, &fp->rf_filehandle);
2517 	}
2518 	fp->rf_vp = vp;
2519 
2520 	list_create(&fp->rf_delegstatelist, sizeof (rfs4_deleg_state_t),
2521 	    offsetof(rfs4_deleg_state_t, rds_node));
2522 
2523 	fp->rf_share_deny = fp->rf_share_access = fp->rf_access_read = 0;
2524 	fp->rf_access_write = fp->rf_deny_read = fp->rf_deny_write = 0;
2525 
2526 	mutex_init(fp->rf_dinfo.rd_recall_lock, NULL, MUTEX_DEFAULT, NULL);
2527 	cv_init(fp->rf_dinfo.rd_recall_cv, NULL, CV_DEFAULT, NULL);
2528 
2529 	fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
2530 
2531 	rw_init(&fp->rf_file_rwlock, NULL, RW_DEFAULT, NULL);
2532 
2533 	mutex_enter(&vp->v_vsd_lock);
2534 	VERIFY(vsd_set(vp, nfs4_srv_vkey, (void *)fp) == 0);
2535 	mutex_exit(&vp->v_vsd_lock);
2536 
2537 	return (TRUE);
2538 }
2539 
2540 rfs4_file_t *
2541 rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2542 {
2543 	rfs4_file_t *fp;
2544 	rfs4_fcreate_arg arg;
2545 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2546 
2547 	arg.vp = vp;
2548 	arg.fh = fh;
2549 
2550 	if (*create == TRUE)
2551 		/* CSTYLED */
2552 		fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp, create,
2553 		    &arg, RFS4_DBS_VALID);
2554 	else {
2555 		mutex_enter(&vp->v_vsd_lock);
2556 		fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2557 		if (fp) {
2558 			rfs4_dbe_lock(fp->rf_dbe);
2559 			if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2560 			    (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2561 				rfs4_dbe_unlock(fp->rf_dbe);
2562 				fp = NULL;
2563 			} else {
2564 				rfs4_dbe_hold(fp->rf_dbe);
2565 				rfs4_dbe_unlock(fp->rf_dbe);
2566 			}
2567 		}
2568 		mutex_exit(&vp->v_vsd_lock);
2569 	}
2570 	return (fp);
2571 }
2572 
2573 /*
2574  * Find a file in the db and once it is located, take the rw lock.
2575  * Need to check the vnode pointer and if it does not exist (it was
2576  * removed between the db location and check) redo the find.  This
2577  * assumes that a file struct that has a NULL vnode pointer is marked
2578  * at 'invalid' and will not be found in the db the second time
2579  * around.
2580  */
2581 rfs4_file_t *
2582 rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2583 {
2584 	rfs4_file_t *fp;
2585 	rfs4_fcreate_arg arg;
2586 	bool_t screate = *create;
2587 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2588 
2589 	if (screate == FALSE) {
2590 		mutex_enter(&vp->v_vsd_lock);
2591 		fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2592 		if (fp) {
2593 			rfs4_dbe_lock(fp->rf_dbe);
2594 			if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2595 			    (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2596 				rfs4_dbe_unlock(fp->rf_dbe);
2597 				mutex_exit(&vp->v_vsd_lock);
2598 				fp = NULL;
2599 			} else {
2600 				rfs4_dbe_hold(fp->rf_dbe);
2601 				rfs4_dbe_unlock(fp->rf_dbe);
2602 				mutex_exit(&vp->v_vsd_lock);
2603 				rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2604 				if (fp->rf_vp == NULL) {
2605 					rw_exit(&fp->rf_file_rwlock);
2606 					rfs4_file_rele(fp);
2607 					fp = NULL;
2608 				}
2609 			}
2610 		} else {
2611 			mutex_exit(&vp->v_vsd_lock);
2612 		}
2613 	} else {
2614 retry:
2615 		arg.vp = vp;
2616 		arg.fh = fh;
2617 
2618 		fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp,
2619 		    create, &arg, RFS4_DBS_VALID);
2620 		if (fp != NULL) {
2621 			rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2622 			if (fp->rf_vp == NULL) {
2623 				rw_exit(&fp->rf_file_rwlock);
2624 				rfs4_file_rele(fp);
2625 				*create = screate;
2626 				goto retry;
2627 			}
2628 		}
2629 	}
2630 
2631 	return (fp);
2632 }
2633 
2634 static uint32_t
2635 lo_state_hash(void *key)
2636 {
2637 	stateid_t *id = key;
2638 
2639 	return (id->bits.ident+id->bits.pid);
2640 }
2641 
2642 static bool_t
2643 lo_state_compare(rfs4_entry_t u_entry, void *key)
2644 {
2645 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2646 	stateid_t *id = key;
2647 	bool_t rc;
2648 
2649 	rc = (lsp->rls_lockid.bits.boottime == id->bits.boottime &&
2650 	    lsp->rls_lockid.bits.type == id->bits.type &&
2651 	    lsp->rls_lockid.bits.ident == id->bits.ident &&
2652 	    lsp->rls_lockid.bits.pid == id->bits.pid);
2653 
2654 	return (rc);
2655 }
2656 
2657 static void *
2658 lo_state_mkkey(rfs4_entry_t u_entry)
2659 {
2660 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2661 
2662 	return (&lsp->rls_lockid);
2663 }
2664 
2665 static bool_t
2666 rfs4_lo_state_expiry(rfs4_entry_t u_entry)
2667 {
2668 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2669 
2670 	if (rfs4_dbe_is_invalid(lsp->rls_dbe))
2671 		return (TRUE);
2672 	if (lsp->rls_state->rs_closed)
2673 		return (TRUE);
2674 	return ((gethrestime_sec() -
2675 	    lsp->rls_state->rs_owner->ro_client->rc_last_access
2676 	    > rfs4_lease_time));
2677 }
2678 
2679 static void
2680 rfs4_lo_state_destroy(rfs4_entry_t u_entry)
2681 {
2682 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2683 
2684 	rfs4_dbe_lock(lsp->rls_state->rs_dbe);
2685 	list_remove(&lsp->rls_state->rs_lostatelist, lsp);
2686 	rfs4_dbe_unlock(lsp->rls_state->rs_dbe);
2687 
2688 	rfs4_sw_destroy(&lsp->rls_sw);
2689 
2690 	/* Make sure to release the file locks */
2691 	if (lsp->rls_locks_cleaned == FALSE) {
2692 		lsp->rls_locks_cleaned = TRUE;
2693 		if (lsp->rls_locker->rl_client->rc_sysidt != LM_NOSYSID) {
2694 			/* Is the PxFS kernel module loaded? */
2695 			if (lm_remove_file_locks != NULL) {
2696 				int new_sysid;
2697 
2698 				/* Encode the cluster nodeid in new sysid */
2699 				new_sysid =
2700 				    lsp->rls_locker->rl_client->rc_sysidt;
2701 				lm_set_nlmid_flk(&new_sysid);
2702 
2703 				/*
2704 				 * This PxFS routine removes file locks for a
2705 				 * client over all nodes of a cluster.
2706 				 */
2707 				DTRACE_PROBE1(nfss_i_clust_rm_lck,
2708 				    int, new_sysid);
2709 				(*lm_remove_file_locks)(new_sysid);
2710 			} else {
2711 				(void) cleanlocks(
2712 				    lsp->rls_state->rs_finfo->rf_vp,
2713 				    lsp->rls_locker->rl_pid,
2714 				    lsp->rls_locker->rl_client->rc_sysidt);
2715 			}
2716 		}
2717 	}
2718 
2719 	/* Free the last reply for this state */
2720 	rfs4_free_reply(&lsp->rls_reply);
2721 
2722 	rfs4_lockowner_rele(lsp->rls_locker);
2723 	lsp->rls_locker = NULL;
2724 
2725 	rfs4_state_rele_nounlock(lsp->rls_state);
2726 	lsp->rls_state = NULL;
2727 }
2728 
2729 static bool_t
2730 rfs4_lo_state_create(rfs4_entry_t u_entry, void *arg)
2731 {
2732 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2733 	rfs4_lo_state_t *argp = (rfs4_lo_state_t *)arg;
2734 	rfs4_lockowner_t *lo = argp->rls_locker;
2735 	rfs4_state_t *sp = argp->rls_state;
2736 
2737 	lsp->rls_state = sp;
2738 
2739 	lsp->rls_lockid = sp->rs_stateid;
2740 	lsp->rls_lockid.bits.type = LOCKID;
2741 	lsp->rls_lockid.bits.chgseq = 0;
2742 	lsp->rls_lockid.bits.pid = lo->rl_pid;
2743 
2744 	lsp->rls_locks_cleaned = FALSE;
2745 	lsp->rls_lock_completed = FALSE;
2746 
2747 	rfs4_sw_init(&lsp->rls_sw);
2748 
2749 	/* Attached the supplied lock owner */
2750 	rfs4_dbe_hold(lo->rl_dbe);
2751 	lsp->rls_locker = lo;
2752 
2753 	rfs4_dbe_lock(sp->rs_dbe);
2754 	list_insert_tail(&sp->rs_lostatelist, lsp);
2755 	rfs4_dbe_hold(sp->rs_dbe);
2756 	rfs4_dbe_unlock(sp->rs_dbe);
2757 
2758 	return (TRUE);
2759 }
2760 
2761 void
2762 rfs4_lo_state_rele(rfs4_lo_state_t *lsp, bool_t unlock_fp)
2763 {
2764 	if (unlock_fp == TRUE)
2765 		rw_exit(&lsp->rls_state->rs_finfo->rf_file_rwlock);
2766 	rfs4_dbe_rele(lsp->rls_dbe);
2767 }
2768 
2769 static rfs4_lo_state_t *
2770 rfs4_findlo_state(stateid_t *id, bool_t lock_fp)
2771 {
2772 	rfs4_lo_state_t *lsp;
2773 	bool_t create = FALSE;
2774 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2775 
2776 	lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_idx, id,
2777 	    &create, NULL, RFS4_DBS_VALID);
2778 	if (lock_fp == TRUE && lsp != NULL)
2779 		rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER);
2780 
2781 	return (lsp);
2782 }
2783 
2784 
2785 static uint32_t
2786 lo_state_lo_hash(void *key)
2787 {
2788 	rfs4_lo_state_t *lsp = key;
2789 
2790 	return (ADDRHASH(lsp->rls_locker) ^ ADDRHASH(lsp->rls_state));
2791 }
2792 
2793 static bool_t
2794 lo_state_lo_compare(rfs4_entry_t u_entry, void *key)
2795 {
2796 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2797 	rfs4_lo_state_t *keyp = key;
2798 
2799 	return (keyp->rls_locker == lsp->rls_locker &&
2800 	    keyp->rls_state == lsp->rls_state);
2801 }
2802 
2803 static void *
2804 lo_state_lo_mkkey(rfs4_entry_t u_entry)
2805 {
2806 	return (u_entry);
2807 }
2808 
2809 rfs4_lo_state_t *
2810 rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp,
2811     bool_t *create)
2812 {
2813 	rfs4_lo_state_t *lsp;
2814 	rfs4_lo_state_t arg;
2815 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2816 
2817 	arg.rls_locker = lo;
2818 	arg.rls_state = sp;
2819 
2820 	lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_owner_idx,
2821 	    &arg, create, &arg, RFS4_DBS_VALID);
2822 
2823 	return (lsp);
2824 }
2825 
2826 static stateid_t
2827 get_stateid(id_t eid)
2828 {
2829 	stateid_t id;
2830 	nfs4_srv_t *nsrv4;
2831 
2832 	nsrv4 = nfs4_get_srv();
2833 
2834 	id.bits.boottime = nsrv4->rfs4_start_time;
2835 	id.bits.ident = eid;
2836 	id.bits.chgseq = 0;
2837 	id.bits.type = 0;
2838 	id.bits.pid = 0;
2839 
2840 	/*
2841 	 * If we are booted as a cluster node, embed our nodeid.
2842 	 * We've already done sanity checks in rfs4_client_create() so no
2843 	 * need to repeat them here.
2844 	 */
2845 	id.bits.clnodeid = (cluster_bootflags & CLUSTER_BOOTED) ?
2846 	    clconf_get_nodeid() : 0;
2847 
2848 	return (id);
2849 }
2850 
2851 /*
2852  * For use only when booted as a cluster node.
2853  * Returns TRUE if the embedded nodeid indicates that this stateid was
2854  * generated on another node.
2855  */
2856 static int
2857 foreign_stateid(stateid_t *id)
2858 {
2859 	ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2860 	return (id->bits.clnodeid != (uint32_t)clconf_get_nodeid());
2861 }
2862 
2863 /*
2864  * For use only when booted as a cluster node.
2865  * Returns TRUE if the embedded nodeid indicates that this clientid was
2866  * generated on another node.
2867  */
2868 static int
2869 foreign_clientid(cid *cidp)
2870 {
2871 	ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2872 	return (cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT !=
2873 	    (uint32_t)clconf_get_nodeid());
2874 }
2875 
2876 /*
2877  * For use only when booted as a cluster node.
2878  * Embed our cluster nodeid into the clientid.
2879  */
2880 static void
2881 embed_nodeid(cid *cidp)
2882 {
2883 	int clnodeid;
2884 	/*
2885 	 * Currently, our state tables are small enough that their
2886 	 * ids will leave enough bits free for the nodeid. If the
2887 	 * tables become larger, we mustn't overwrite the id.
2888 	 * Equally, we only have room for so many bits of nodeid, so
2889 	 * must check that too.
2890 	 */
2891 	ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2892 	ASSERT(cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT == 0);
2893 	clnodeid = clconf_get_nodeid();
2894 	ASSERT(clnodeid <= CLUSTER_MAX_NODEID);
2895 	ASSERT(clnodeid != NODEID_UNKNOWN);
2896 	cidp->impl_id.c_id |= (clnodeid << CLUSTER_NODEID_SHIFT);
2897 }
2898 
2899 static uint32_t
2900 state_hash(void *key)
2901 {
2902 	stateid_t *ip = (stateid_t *)key;
2903 
2904 	return (ip->bits.ident);
2905 }
2906 
2907 static bool_t
2908 state_compare(rfs4_entry_t u_entry, void *key)
2909 {
2910 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2911 	stateid_t *id = (stateid_t *)key;
2912 	bool_t rc;
2913 
2914 	rc = (sp->rs_stateid.bits.boottime == id->bits.boottime &&
2915 	    sp->rs_stateid.bits.ident == id->bits.ident);
2916 
2917 	return (rc);
2918 }
2919 
2920 static void *
2921 state_mkkey(rfs4_entry_t u_entry)
2922 {
2923 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2924 
2925 	return (&sp->rs_stateid);
2926 }
2927 
2928 static void
2929 rfs4_state_destroy(rfs4_entry_t u_entry)
2930 {
2931 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2932 
2933 	/* remove from openowner list */
2934 	rfs4_dbe_lock(sp->rs_owner->ro_dbe);
2935 	list_remove(&sp->rs_owner->ro_statelist, sp);
2936 	rfs4_dbe_unlock(sp->rs_owner->ro_dbe);
2937 
2938 	list_destroy(&sp->rs_lostatelist);
2939 
2940 	/* release any share locks for this stateid if it's still open */
2941 	if (!sp->rs_closed) {
2942 		rfs4_dbe_lock(sp->rs_dbe);
2943 		(void) rfs4_unshare(sp);
2944 		rfs4_dbe_unlock(sp->rs_dbe);
2945 	}
2946 
2947 	/* Were done with the file */
2948 	rfs4_file_rele(sp->rs_finfo);
2949 	sp->rs_finfo = NULL;
2950 
2951 	/* And now with the openowner */
2952 	rfs4_openowner_rele(sp->rs_owner);
2953 	sp->rs_owner = NULL;
2954 }
2955 
2956 static void
2957 rfs4_state_rele_nounlock(rfs4_state_t *sp)
2958 {
2959 	rfs4_dbe_rele(sp->rs_dbe);
2960 }
2961 
2962 void
2963 rfs4_state_rele(rfs4_state_t *sp)
2964 {
2965 	rw_exit(&sp->rs_finfo->rf_file_rwlock);
2966 	rfs4_dbe_rele(sp->rs_dbe);
2967 }
2968 
2969 static uint32_t
2970 deleg_hash(void *key)
2971 {
2972 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)key;
2973 
2974 	return (ADDRHASH(dsp->rds_client) ^ ADDRHASH(dsp->rds_finfo));
2975 }
2976 
2977 static bool_t
2978 deleg_compare(rfs4_entry_t u_entry, void *key)
2979 {
2980 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2981 	rfs4_deleg_state_t *kdsp = (rfs4_deleg_state_t *)key;
2982 
2983 	return (dsp->rds_client == kdsp->rds_client &&
2984 	    dsp->rds_finfo == kdsp->rds_finfo);
2985 }
2986 
2987 static void *
2988 deleg_mkkey(rfs4_entry_t u_entry)
2989 {
2990 	return (u_entry);
2991 }
2992 
2993 static uint32_t
2994 deleg_state_hash(void *key)
2995 {
2996 	stateid_t *ip = (stateid_t *)key;
2997 
2998 	return (ip->bits.ident);
2999 }
3000 
3001 static bool_t
3002 deleg_state_compare(rfs4_entry_t u_entry, void *key)
3003 {
3004 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3005 	stateid_t *id = (stateid_t *)key;
3006 	bool_t rc;
3007 
3008 	if (id->bits.type != DELEGID)
3009 		return (FALSE);
3010 
3011 	rc = (dsp->rds_delegid.bits.boottime == id->bits.boottime &&
3012 	    dsp->rds_delegid.bits.ident == id->bits.ident);
3013 
3014 	return (rc);
3015 }
3016 
3017 static void *
3018 deleg_state_mkkey(rfs4_entry_t u_entry)
3019 {
3020 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3021 
3022 	return (&dsp->rds_delegid);
3023 }
3024 
3025 static bool_t
3026 rfs4_deleg_state_expiry(rfs4_entry_t u_entry)
3027 {
3028 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3029 
3030 	if (rfs4_dbe_is_invalid(dsp->rds_dbe))
3031 		return (TRUE);
3032 
3033 	if (dsp->rds_dtype == OPEN_DELEGATE_NONE)
3034 		return (TRUE);
3035 
3036 	if ((gethrestime_sec() - dsp->rds_client->rc_last_access
3037 	    > rfs4_lease_time)) {
3038 		rfs4_dbe_invalidate(dsp->rds_dbe);
3039 		return (TRUE);
3040 	}
3041 
3042 	return (FALSE);
3043 }
3044 
3045 static bool_t
3046 rfs4_deleg_state_create(rfs4_entry_t u_entry, void *argp)
3047 {
3048 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3049 	rfs4_file_t *fp = ((rfs4_deleg_state_t *)argp)->rds_finfo;
3050 	rfs4_client_t *cp = ((rfs4_deleg_state_t *)argp)->rds_client;
3051 
3052 	rfs4_dbe_hold(fp->rf_dbe);
3053 	rfs4_dbe_hold(cp->rc_dbe);
3054 
3055 	dsp->rds_delegid = get_stateid(rfs4_dbe_getid(dsp->rds_dbe));
3056 	dsp->rds_delegid.bits.type = DELEGID;
3057 	dsp->rds_finfo = fp;
3058 	dsp->rds_client = cp;
3059 	dsp->rds_dtype = OPEN_DELEGATE_NONE;
3060 
3061 	dsp->rds_time_granted = gethrestime_sec();	/* observability */
3062 	dsp->rds_time_revoked = 0;
3063 
3064 	list_link_init(&dsp->rds_node);
3065 
3066 	return (TRUE);
3067 }
3068 
3069 static void
3070 rfs4_deleg_state_destroy(rfs4_entry_t u_entry)
3071 {
3072 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3073 
3074 	/* return delegation if necessary */
3075 	rfs4_return_deleg(dsp, FALSE);
3076 
3077 	/* Were done with the file */
3078 	rfs4_file_rele(dsp->rds_finfo);
3079 	dsp->rds_finfo = NULL;
3080 
3081 	/* And now with the openowner */
3082 	rfs4_client_rele(dsp->rds_client);
3083 	dsp->rds_client = NULL;
3084 }
3085 
3086 rfs4_deleg_state_t *
3087 rfs4_finddeleg(rfs4_state_t *sp, bool_t *create)
3088 {
3089 	rfs4_deleg_state_t ds, *dsp;
3090 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
3091 
3092 	ds.rds_client = sp->rs_owner->ro_client;
3093 	ds.rds_finfo = sp->rs_finfo;
3094 
3095 	dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_idx, &ds,
3096 	    create, &ds, RFS4_DBS_VALID);
3097 
3098 	return (dsp);
3099 }
3100 
3101 rfs4_deleg_state_t *
3102 rfs4_finddelegstate(stateid_t *id)
3103 {
3104 	rfs4_deleg_state_t *dsp;
3105 	bool_t create = FALSE;
3106 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
3107 
3108 	dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_state_idx,
3109 	    id, &create, NULL, RFS4_DBS_VALID);
3110 
3111 	return (dsp);
3112 }
3113 
3114 void
3115 rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp)
3116 {
3117 	rfs4_dbe_rele(dsp->rds_dbe);
3118 }
3119 
3120 void
3121 rfs4_update_lock_sequence(rfs4_lo_state_t *lsp)
3122 {
3123 
3124 	rfs4_dbe_lock(lsp->rls_dbe);
3125 
3126 	/*
3127 	 * If we are skipping sequence id checking, this means that
3128 	 * this is the first lock request and therefore the sequence
3129 	 * id does not need to be updated.  This only happens on the
3130 	 * first lock request for a lockowner
3131 	 */
3132 	if (!lsp->rls_skip_seqid_check)
3133 		lsp->rls_seqid++;
3134 
3135 	rfs4_dbe_unlock(lsp->rls_dbe);
3136 }
3137 
3138 void
3139 rfs4_update_lock_resp(rfs4_lo_state_t *lsp, nfs_resop4 *resp)
3140 {
3141 
3142 	rfs4_dbe_lock(lsp->rls_dbe);
3143 
3144 	rfs4_free_reply(&lsp->rls_reply);
3145 
3146 	rfs4_copy_reply(&lsp->rls_reply, resp);
3147 
3148 	rfs4_dbe_unlock(lsp->rls_dbe);
3149 }
3150 
3151 void
3152 rfs4_free_opens(rfs4_openowner_t *oo, bool_t invalidate,
3153     bool_t close_of_client)
3154 {
3155 	rfs4_state_t *sp;
3156 
3157 	rfs4_dbe_lock(oo->ro_dbe);
3158 
3159 	for (sp = list_head(&oo->ro_statelist); sp != NULL;
3160 	    sp = list_next(&oo->ro_statelist, sp)) {
3161 		rfs4_state_close(sp, FALSE, close_of_client, CRED());
3162 		if (invalidate == TRUE)
3163 			rfs4_dbe_invalidate(sp->rs_dbe);
3164 	}
3165 
3166 	rfs4_dbe_invalidate(oo->ro_dbe);
3167 	rfs4_dbe_unlock(oo->ro_dbe);
3168 }
3169 
3170 static uint32_t
3171 state_owner_file_hash(void *key)
3172 {
3173 	rfs4_state_t *sp = key;
3174 
3175 	return (ADDRHASH(sp->rs_owner) ^ ADDRHASH(sp->rs_finfo));
3176 }
3177 
3178 static bool_t
3179 state_owner_file_compare(rfs4_entry_t u_entry, void *key)
3180 {
3181 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3182 	rfs4_state_t *arg = key;
3183 
3184 	if (sp->rs_closed == TRUE)
3185 		return (FALSE);
3186 
3187 	return (arg->rs_owner == sp->rs_owner && arg->rs_finfo == sp->rs_finfo);
3188 }
3189 
3190 static void *
3191 state_owner_file_mkkey(rfs4_entry_t u_entry)
3192 {
3193 	return (u_entry);
3194 }
3195 
3196 static uint32_t
3197 state_file_hash(void *key)
3198 {
3199 	return (ADDRHASH(key));
3200 }
3201 
3202 static bool_t
3203 state_file_compare(rfs4_entry_t u_entry, void *key)
3204 {
3205 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3206 	rfs4_file_t *fp = key;
3207 
3208 	if (sp->rs_closed == TRUE)
3209 		return (FALSE);
3210 
3211 	return (fp == sp->rs_finfo);
3212 }
3213 
3214 static void *
3215 state_file_mkkey(rfs4_entry_t u_entry)
3216 {
3217 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3218 
3219 	return (sp->rs_finfo);
3220 }
3221 
3222 rfs4_state_t *
3223 rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp,
3224     bool_t *create)
3225 {
3226 	rfs4_state_t *sp;
3227 	rfs4_state_t key;
3228 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
3229 
3230 	key.rs_owner = oo;
3231 	key.rs_finfo = fp;
3232 
3233 	sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_owner_file_idx,
3234 	    &key, create, &key, RFS4_DBS_VALID);
3235 
3236 	return (sp);
3237 }
3238 
3239 /* This returns ANY state struct that refers to this file */
3240 static rfs4_state_t *
3241 rfs4_findstate_by_file(rfs4_file_t *fp)
3242 {
3243 	bool_t create = FALSE;
3244 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
3245 
3246 	return ((rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_file_idx, fp,
3247 	    &create, fp, RFS4_DBS_VALID));
3248 }
3249 
3250 static bool_t
3251 rfs4_state_expiry(rfs4_entry_t u_entry)
3252 {
3253 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3254 
3255 	if (rfs4_dbe_is_invalid(sp->rs_dbe))
3256 		return (TRUE);
3257 
3258 	if (sp->rs_closed == TRUE &&
3259 	    ((gethrestime_sec() - rfs4_dbe_get_timerele(sp->rs_dbe))
3260 	    > rfs4_lease_time))
3261 		return (TRUE);
3262 
3263 	return ((gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access
3264 	    > rfs4_lease_time));
3265 }
3266 
3267 static bool_t
3268 rfs4_state_create(rfs4_entry_t u_entry, void *argp)
3269 {
3270 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3271 	rfs4_file_t *fp = ((rfs4_state_t *)argp)->rs_finfo;
3272 	rfs4_openowner_t *oo = ((rfs4_state_t *)argp)->rs_owner;
3273 
3274 	rfs4_dbe_hold(fp->rf_dbe);
3275 	rfs4_dbe_hold(oo->ro_dbe);
3276 	sp->rs_stateid = get_stateid(rfs4_dbe_getid(sp->rs_dbe));
3277 	sp->rs_stateid.bits.type = OPENID;
3278 	sp->rs_owner = oo;
3279 	sp->rs_finfo = fp;
3280 
3281 	list_create(&sp->rs_lostatelist, sizeof (rfs4_lo_state_t),
3282 	    offsetof(rfs4_lo_state_t, rls_node));
3283 
3284 	/* Insert state on per open owner's list */
3285 	rfs4_dbe_lock(oo->ro_dbe);
3286 	list_insert_tail(&oo->ro_statelist, sp);
3287 	rfs4_dbe_unlock(oo->ro_dbe);
3288 
3289 	return (TRUE);
3290 }
3291 
3292 static rfs4_state_t *
3293 rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3294 {
3295 	rfs4_state_t *sp;
3296 	bool_t create = FALSE;
3297 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
3298 
3299 	sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_idx, id,
3300 	    &create, NULL, find_invalid);
3301 	if (lock_fp == TRUE && sp != NULL)
3302 		rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER);
3303 
3304 	return (sp);
3305 }
3306 
3307 void
3308 rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, bool_t close_of_client,
3309     cred_t *cr)
3310 {
3311 	/* Remove the associated lo_state owners */
3312 	if (!lock_held)
3313 		rfs4_dbe_lock(sp->rs_dbe);
3314 
3315 	/*
3316 	 * If refcnt == 0, the dbe is about to be destroyed.
3317 	 * lock state will be released by the reaper thread.
3318 	 */
3319 
3320 	if (rfs4_dbe_refcnt(sp->rs_dbe) > 0) {
3321 		if (sp->rs_closed == FALSE) {
3322 			rfs4_release_share_lock_state(sp, cr, close_of_client);
3323 			sp->rs_closed = TRUE;
3324 		}
3325 	}
3326 
3327 	if (!lock_held)
3328 		rfs4_dbe_unlock(sp->rs_dbe);
3329 }
3330 
3331 /*
3332  * Remove all state associated with the given client.
3333  */
3334 void
3335 rfs4_client_state_remove(rfs4_client_t *cp)
3336 {
3337 	rfs4_openowner_t *oo;
3338 
3339 	rfs4_dbe_lock(cp->rc_dbe);
3340 
3341 	for (oo = list_head(&cp->rc_openownerlist); oo != NULL;
3342 	    oo = list_next(&cp->rc_openownerlist, oo)) {
3343 		rfs4_free_opens(oo, TRUE, TRUE);
3344 	}
3345 
3346 	rfs4_dbe_unlock(cp->rc_dbe);
3347 }
3348 
3349 void
3350 rfs4_client_close(rfs4_client_t *cp)
3351 {
3352 	/* Mark client as going away. */
3353 	rfs4_dbe_lock(cp->rc_dbe);
3354 	rfs4_dbe_invalidate(cp->rc_dbe);
3355 	rfs4_dbe_unlock(cp->rc_dbe);
3356 
3357 	rfs4_client_state_remove(cp);
3358 
3359 	/* Release the client */
3360 	rfs4_client_rele(cp);
3361 }
3362 
3363 nfsstat4
3364 rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
3365 {
3366 	cid *cidp = (cid *) cp;
3367 	nfs4_srv_t *nsrv4;
3368 
3369 	nsrv4 = nfs4_get_srv();
3370 
3371 	/*
3372 	 * If we are booted as a cluster node, check the embedded nodeid.
3373 	 * If it indicates that this clientid was generated on another node,
3374 	 * inform the client accordingly.
3375 	 */
3376 	if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
3377 		return (NFS4ERR_STALE_CLIENTID);
3378 
3379 	/*
3380 	 * If the server start time matches the time provided
3381 	 * by the client (via the clientid) and this is NOT a
3382 	 * setclientid_confirm then return EXPIRED.
3383 	 */
3384 	if (!setclid_confirm &&
3385 	    cidp->impl_id.start_time == nsrv4->rfs4_start_time)
3386 		return (NFS4ERR_EXPIRED);
3387 
3388 	return (NFS4ERR_STALE_CLIENTID);
3389 }
3390 
3391 /*
3392  * This is used when a stateid has not been found amongst the
3393  * current server's state.  Check the stateid to see if it
3394  * was from this server instantiation or not.
3395  */
3396 static nfsstat4
3397 what_stateid_error(stateid_t *id, stateid_type_t type)
3398 {
3399 	nfs4_srv_t *nsrv4;
3400 
3401 	nsrv4 = nfs4_get_srv();
3402 
3403 	/* If we are booted as a cluster node, was stateid locally generated? */
3404 	if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3405 		return (NFS4ERR_STALE_STATEID);
3406 
3407 	/* If types don't match then no use checking further */
3408 	if (type != id->bits.type)
3409 		return (NFS4ERR_BAD_STATEID);
3410 
3411 	/* From a different server instantiation, return STALE */
3412 	if (id->bits.boottime != nsrv4->rfs4_start_time)
3413 		return (NFS4ERR_STALE_STATEID);
3414 
3415 	/*
3416 	 * From this server but the state is most likely beyond lease
3417 	 * timeout: return NFS4ERR_EXPIRED.  However, there is the
3418 	 * case of a delegation stateid.  For delegations, there is a
3419 	 * case where the state can be removed without the client's
3420 	 * knowledge/consent: revocation.  In the case of delegation
3421 	 * revocation, the delegation state will be removed and will
3422 	 * not be found.  If the client does something like a
3423 	 * DELEGRETURN or even a READ/WRITE with a delegatoin stateid
3424 	 * that has been revoked, the server should return BAD_STATEID
3425 	 * instead of the more common EXPIRED error.
3426 	 */
3427 	if (id->bits.boottime == nsrv4->rfs4_start_time) {
3428 		if (type == DELEGID)
3429 			return (NFS4ERR_BAD_STATEID);
3430 		else
3431 			return (NFS4ERR_EXPIRED);
3432 	}
3433 
3434 	return (NFS4ERR_BAD_STATEID);
3435 }
3436 
3437 /*
3438  * Used later on to find the various state structs.  When called from
3439  * rfs4_check_stateid()->rfs4_get_all_state(), no file struct lock is
3440  * taken (it is not needed) and helps on the read/write path with
3441  * respect to performance.
3442  */
3443 static nfsstat4
3444 rfs4_get_state_lockit(stateid4 *stateid, rfs4_state_t **spp,
3445     rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3446 {
3447 	stateid_t *id = (stateid_t *)stateid;
3448 	rfs4_state_t *sp;
3449 
3450 	*spp = NULL;
3451 
3452 	/* If we are booted as a cluster node, was stateid locally generated? */
3453 	if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3454 		return (NFS4ERR_STALE_STATEID);
3455 
3456 	sp = rfs4_findstate(id, find_invalid, lock_fp);
3457 	if (sp == NULL) {
3458 		return (what_stateid_error(id, OPENID));
3459 	}
3460 
3461 	if (rfs4_lease_expired(sp->rs_owner->ro_client)) {
3462 		if (lock_fp == TRUE)
3463 			rfs4_state_rele(sp);
3464 		else
3465 			rfs4_state_rele_nounlock(sp);
3466 		return (NFS4ERR_EXPIRED);
3467 	}
3468 
3469 	*spp = sp;
3470 
3471 	return (NFS4_OK);
3472 }
3473 
3474 nfsstat4
3475 rfs4_get_state(stateid4 *stateid, rfs4_state_t **spp,
3476     rfs4_dbsearch_type_t find_invalid)
3477 {
3478 	return (rfs4_get_state_lockit(stateid, spp, find_invalid, TRUE));
3479 }
3480 
3481 int
3482 rfs4_check_stateid_seqid(rfs4_state_t *sp, stateid4 *stateid)
3483 {
3484 	stateid_t *id = (stateid_t *)stateid;
3485 
3486 	if (rfs4_lease_expired(sp->rs_owner->ro_client))
3487 		return (NFS4_CHECK_STATEID_EXPIRED);
3488 
3489 	/* Stateid is some time in the future - that's bad */
3490 	if (sp->rs_stateid.bits.chgseq < id->bits.chgseq)
3491 		return (NFS4_CHECK_STATEID_BAD);
3492 
3493 	if (sp->rs_stateid.bits.chgseq == id->bits.chgseq + 1)
3494 		return (NFS4_CHECK_STATEID_REPLAY);
3495 
3496 	/* Stateid is some time in the past - that's old */
3497 	if (sp->rs_stateid.bits.chgseq > id->bits.chgseq)
3498 		return (NFS4_CHECK_STATEID_OLD);
3499 
3500 	/* Caller needs to know about confirmation before closure */
3501 	if (sp->rs_owner->ro_need_confirm)
3502 		return (NFS4_CHECK_STATEID_UNCONFIRMED);
3503 
3504 	if (sp->rs_closed == TRUE)
3505 		return (NFS4_CHECK_STATEID_CLOSED);
3506 
3507 	return (NFS4_CHECK_STATEID_OKAY);
3508 }
3509 
3510 int
3511 rfs4_check_lo_stateid_seqid(rfs4_lo_state_t *lsp, stateid4 *stateid)
3512 {
3513 	stateid_t *id = (stateid_t *)stateid;
3514 
3515 	if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client))
3516 		return (NFS4_CHECK_STATEID_EXPIRED);
3517 
3518 	/* Stateid is some time in the future - that's bad */
3519 	if (lsp->rls_lockid.bits.chgseq < id->bits.chgseq)
3520 		return (NFS4_CHECK_STATEID_BAD);
3521 
3522 	if (lsp->rls_lockid.bits.chgseq == id->bits.chgseq + 1)
3523 		return (NFS4_CHECK_STATEID_REPLAY);
3524 
3525 	/* Stateid is some time in the past - that's old */
3526 	if (lsp->rls_lockid.bits.chgseq > id->bits.chgseq)
3527 		return (NFS4_CHECK_STATEID_OLD);
3528 
3529 	if (lsp->rls_state->rs_closed == TRUE)
3530 		return (NFS4_CHECK_STATEID_CLOSED);
3531 
3532 	return (NFS4_CHECK_STATEID_OKAY);
3533 }
3534 
3535 nfsstat4
3536 rfs4_get_deleg_state(stateid4 *stateid, rfs4_deleg_state_t **dspp)
3537 {
3538 	stateid_t *id = (stateid_t *)stateid;
3539 	rfs4_deleg_state_t *dsp;
3540 
3541 	*dspp = NULL;
3542 
3543 	/* If we are booted as a cluster node, was stateid locally generated? */
3544 	if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3545 		return (NFS4ERR_STALE_STATEID);
3546 
3547 	dsp = rfs4_finddelegstate(id);
3548 	if (dsp == NULL) {
3549 		return (what_stateid_error(id, DELEGID));
3550 	}
3551 
3552 	if (rfs4_lease_expired(dsp->rds_client)) {
3553 		rfs4_deleg_state_rele(dsp);
3554 		return (NFS4ERR_EXPIRED);
3555 	}
3556 
3557 	*dspp = dsp;
3558 
3559 	return (NFS4_OK);
3560 }
3561 
3562 nfsstat4
3563 rfs4_get_lo_state(stateid4 *stateid, rfs4_lo_state_t **lspp, bool_t lock_fp)
3564 {
3565 	stateid_t *id = (stateid_t *)stateid;
3566 	rfs4_lo_state_t *lsp;
3567 
3568 	*lspp = NULL;
3569 
3570 	/* If we are booted as a cluster node, was stateid locally generated? */
3571 	if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3572 		return (NFS4ERR_STALE_STATEID);
3573 
3574 	lsp = rfs4_findlo_state(id, lock_fp);
3575 	if (lsp == NULL) {
3576 		return (what_stateid_error(id, LOCKID));
3577 	}
3578 
3579 	if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client)) {
3580 		rfs4_lo_state_rele(lsp, lock_fp);
3581 		return (NFS4ERR_EXPIRED);
3582 	}
3583 
3584 	*lspp = lsp;
3585 
3586 	return (NFS4_OK);
3587 }
3588 
3589 static nfsstat4
3590 rfs4_get_all_state(stateid4 *sid, rfs4_state_t **spp,
3591     rfs4_deleg_state_t **dspp, rfs4_lo_state_t **lspp)
3592 {
3593 	rfs4_state_t *sp = NULL;
3594 	rfs4_deleg_state_t *dsp = NULL;
3595 	rfs4_lo_state_t *lsp = NULL;
3596 	stateid_t *id;
3597 	nfsstat4 status;
3598 
3599 	*spp = NULL; *dspp = NULL; *lspp = NULL;
3600 
3601 	id = (stateid_t *)sid;
3602 	switch (id->bits.type) {
3603 	case OPENID:
3604 		status = rfs4_get_state_lockit(sid, &sp, FALSE, FALSE);
3605 		break;
3606 	case DELEGID:
3607 		status = rfs4_get_deleg_state(sid, &dsp);
3608 		break;
3609 	case LOCKID:
3610 		status = rfs4_get_lo_state(sid, &lsp, FALSE);
3611 		if (status == NFS4_OK) {
3612 			sp = lsp->rls_state;
3613 			rfs4_dbe_hold(sp->rs_dbe);
3614 		}
3615 		break;
3616 	default:
3617 		status = NFS4ERR_BAD_STATEID;
3618 	}
3619 
3620 	if (status == NFS4_OK) {
3621 		*spp = sp;
3622 		*dspp = dsp;
3623 		*lspp = lsp;
3624 	}
3625 
3626 	return (status);
3627 }
3628 
3629 /*
3630  * Given the I/O mode (FREAD or FWRITE), this checks whether the
3631  * rfs4_state_t struct has access to do this operation and if so
3632  * return NFS4_OK; otherwise the proper NFSv4 error is returned.
3633  */
3634 nfsstat4
3635 rfs4_state_has_access(rfs4_state_t *sp, int mode, vnode_t *vp)
3636 {
3637 	nfsstat4 stat = NFS4_OK;
3638 	rfs4_file_t *fp;
3639 	bool_t create = FALSE;
3640 
3641 	rfs4_dbe_lock(sp->rs_dbe);
3642 	if (mode == FWRITE) {
3643 		if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)) {
3644 			stat = NFS4ERR_OPENMODE;
3645 		}
3646 	} else if (mode == FREAD) {
3647 		if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)) {
3648 			/*
3649 			 * If we have OPENed the file with DENYing access
3650 			 * to both READ and WRITE then no one else could
3651 			 * have OPENed the file, hence no conflicting READ
3652 			 * deny.  This check is merely an optimization.
3653 			 */
3654 			if (sp->rs_share_deny == OPEN4_SHARE_DENY_BOTH)
3655 				goto out;
3656 
3657 			/* Check against file struct's DENY mode */
3658 			fp = rfs4_findfile(vp, NULL, &create);
3659 			if (fp != NULL) {
3660 				int deny_read = 0;
3661 				rfs4_dbe_lock(fp->rf_dbe);
3662 				/*
3663 				 * Check if any other open owner has the file
3664 				 * OPENed with deny READ.
3665 				 */
3666 				if (sp->rs_share_deny & OPEN4_SHARE_DENY_READ)
3667 					deny_read = 1;
3668 				ASSERT(fp->rf_deny_read >= deny_read);
3669 				if (fp->rf_deny_read > deny_read)
3670 					stat = NFS4ERR_OPENMODE;
3671 				rfs4_dbe_unlock(fp->rf_dbe);
3672 				rfs4_file_rele(fp);
3673 			}
3674 		}
3675 	} else {
3676 		/* Illegal I/O mode */
3677 		stat = NFS4ERR_INVAL;
3678 	}
3679 out:
3680 	rfs4_dbe_unlock(sp->rs_dbe);
3681 	return (stat);
3682 }
3683 
3684 /*
3685  * Given the I/O mode (FREAD or FWRITE), the vnode, the stateid and whether
3686  * the file is being truncated, return NFS4_OK if allowed or appropriate
3687  * V4 error if not. Note NFS4ERR_DELAY will be returned and a recall on
3688  * the associated file will be done if the I/O is not consistent with any
3689  * delegation in effect on the file. Should be holding VOP_RWLOCK, either
3690  * as reader or writer as appropriate. rfs4_op_open will acquire the
3691  * VOP_RWLOCK as writer when setting up delegation. If the stateid is bad
3692  * this routine will return NFS4ERR_BAD_STATEID. In addition, through the
3693  * deleg parameter, we will return whether a write delegation is held by
3694  * the client associated with this stateid.
3695  * If the server instance associated with the relevant client is in its
3696  * grace period, return NFS4ERR_GRACE.
3697  */
3698 
3699 nfsstat4
3700 rfs4_check_stateid(int mode, vnode_t *vp,
3701     stateid4 *stateid, bool_t trunc, bool_t *deleg,
3702     bool_t do_access, caller_context_t *ct)
3703 {
3704 	rfs4_file_t *fp;
3705 	bool_t create = FALSE;
3706 	rfs4_state_t *sp;
3707 	rfs4_deleg_state_t *dsp;
3708 	rfs4_lo_state_t *lsp;
3709 	stateid_t *id = (stateid_t *)stateid;
3710 	nfsstat4 stat = NFS4_OK;
3711 
3712 	if (ct != NULL) {
3713 		ct->cc_sysid = 0;
3714 		ct->cc_pid = 0;
3715 		ct->cc_caller_id = nfs4_srv_caller_id;
3716 		ct->cc_flags = CC_DONTBLOCK;
3717 	}
3718 
3719 	if (ISSPECIAL(stateid)) {
3720 		fp = rfs4_findfile(vp, NULL, &create);
3721 		if (fp == NULL)
3722 			return (NFS4_OK);
3723 		if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
3724 			rfs4_file_rele(fp);
3725 			return (NFS4_OK);
3726 		}
3727 		if (mode == FWRITE ||
3728 		    fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
3729 			rfs4_recall_deleg(fp, trunc, NULL);
3730 			rfs4_file_rele(fp);
3731 			return (NFS4ERR_DELAY);
3732 		}
3733 		rfs4_file_rele(fp);
3734 		return (NFS4_OK);
3735 	} else {
3736 		stat = rfs4_get_all_state(stateid, &sp, &dsp, &lsp);
3737 		if (stat != NFS4_OK)
3738 			return (stat);
3739 		if (lsp != NULL) {
3740 			/* Is associated server instance in its grace period? */
3741 			if (rfs4_clnt_in_grace(lsp->rls_locker->rl_client)) {
3742 				rfs4_lo_state_rele(lsp, FALSE);
3743 				if (sp != NULL)
3744 					rfs4_state_rele_nounlock(sp);
3745 				return (NFS4ERR_GRACE);
3746 			}
3747 			if (id->bits.type == LOCKID) {
3748 				/* Seqid in the future? - that's bad */
3749 				if (lsp->rls_lockid.bits.chgseq <
3750 				    id->bits.chgseq) {
3751 					rfs4_lo_state_rele(lsp, FALSE);
3752 					if (sp != NULL)
3753 						rfs4_state_rele_nounlock(sp);
3754 					return (NFS4ERR_BAD_STATEID);
3755 				}
3756 				/* Seqid in the past? - that's old */
3757 				if (lsp->rls_lockid.bits.chgseq >
3758 				    id->bits.chgseq) {
3759 					rfs4_lo_state_rele(lsp, FALSE);
3760 					if (sp != NULL)
3761 						rfs4_state_rele_nounlock(sp);
3762 					return (NFS4ERR_OLD_STATEID);
3763 				}
3764 				/* Ensure specified filehandle matches */
3765 				if (lsp->rls_state->rs_finfo->rf_vp != vp) {
3766 					rfs4_lo_state_rele(lsp, FALSE);
3767 					if (sp != NULL)
3768 						rfs4_state_rele_nounlock(sp);
3769 					return (NFS4ERR_BAD_STATEID);
3770 				}
3771 			}
3772 			if (ct != NULL) {
3773 				ct->cc_sysid =
3774 				    lsp->rls_locker->rl_client->rc_sysidt;
3775 				ct->cc_pid = lsp->rls_locker->rl_pid;
3776 			}
3777 			rfs4_lo_state_rele(lsp, FALSE);
3778 		}
3779 
3780 		/* Stateid provided was an "open" stateid */
3781 		if (sp != NULL) {
3782 			/* Is associated server instance in its grace period? */
3783 			if (rfs4_clnt_in_grace(sp->rs_owner->ro_client)) {
3784 				rfs4_state_rele_nounlock(sp);
3785 				return (NFS4ERR_GRACE);
3786 			}
3787 			if (id->bits.type == OPENID) {
3788 				/* Seqid in the future? - that's bad */
3789 				if (sp->rs_stateid.bits.chgseq <
3790 				    id->bits.chgseq) {
3791 					rfs4_state_rele_nounlock(sp);
3792 					return (NFS4ERR_BAD_STATEID);
3793 				}
3794 				/* Seqid in the past - that's old */
3795 				if (sp->rs_stateid.bits.chgseq >
3796 				    id->bits.chgseq) {
3797 					rfs4_state_rele_nounlock(sp);
3798 					return (NFS4ERR_OLD_STATEID);
3799 				}
3800 			}
3801 			/* Ensure specified filehandle matches */
3802 			if (sp->rs_finfo->rf_vp != vp) {
3803 				rfs4_state_rele_nounlock(sp);
3804 				return (NFS4ERR_BAD_STATEID);
3805 			}
3806 
3807 			if (sp->rs_owner->ro_need_confirm) {
3808 				rfs4_state_rele_nounlock(sp);
3809 				return (NFS4ERR_BAD_STATEID);
3810 			}
3811 
3812 			if (sp->rs_closed == TRUE) {
3813 				rfs4_state_rele_nounlock(sp);
3814 				return (NFS4ERR_OLD_STATEID);
3815 			}
3816 
3817 			if (do_access)
3818 				stat = rfs4_state_has_access(sp, mode, vp);
3819 			else
3820 				stat = NFS4_OK;
3821 
3822 			/*
3823 			 * Return whether this state has write
3824 			 * delegation if desired
3825 			 */
3826 			if (deleg && (sp->rs_finfo->rf_dinfo.rd_dtype ==
3827 			    OPEN_DELEGATE_WRITE))
3828 				*deleg = TRUE;
3829 
3830 			/*
3831 			 * We got a valid stateid, so we update the
3832 			 * lease on the client. Ideally we would like
3833 			 * to do this after the calling op succeeds,
3834 			 * but for now this will be good
3835 			 * enough. Callers of this routine are
3836 			 * currently insulated from the state stuff.
3837 			 */
3838 			rfs4_update_lease(sp->rs_owner->ro_client);
3839 
3840 			/*
3841 			 * If a delegation is present on this file and
3842 			 * this is a WRITE, then update the lastwrite
3843 			 * time to indicate that activity is present.
3844 			 */
3845 			if (sp->rs_finfo->rf_dinfo.rd_dtype ==
3846 			    OPEN_DELEGATE_WRITE &&
3847 			    mode == FWRITE) {
3848 				sp->rs_finfo->rf_dinfo.rd_time_lastwrite =
3849 				    gethrestime_sec();
3850 			}
3851 
3852 			rfs4_state_rele_nounlock(sp);
3853 
3854 			return (stat);
3855 		}
3856 
3857 		if (dsp != NULL) {
3858 			/* Is associated server instance in its grace period? */
3859 			if (rfs4_clnt_in_grace(dsp->rds_client)) {
3860 				rfs4_deleg_state_rele(dsp);
3861 				return (NFS4ERR_GRACE);
3862 			}
3863 			if (dsp->rds_delegid.bits.chgseq != id->bits.chgseq) {
3864 				rfs4_deleg_state_rele(dsp);
3865 				return (NFS4ERR_BAD_STATEID);
3866 			}
3867 
3868 			/* Ensure specified filehandle matches */
3869 			if (dsp->rds_finfo->rf_vp != vp) {
3870 				rfs4_deleg_state_rele(dsp);
3871 				return (NFS4ERR_BAD_STATEID);
3872 			}
3873 			/*
3874 			 * Return whether this state has write
3875 			 * delegation if desired
3876 			 */
3877 			if (deleg && (dsp->rds_finfo->rf_dinfo.rd_dtype ==
3878 			    OPEN_DELEGATE_WRITE))
3879 				*deleg = TRUE;
3880 
3881 			rfs4_update_lease(dsp->rds_client);
3882 
3883 			/*
3884 			 * If a delegation is present on this file and
3885 			 * this is a WRITE, then update the lastwrite
3886 			 * time to indicate that activity is present.
3887 			 */
3888 			if (dsp->rds_finfo->rf_dinfo.rd_dtype ==
3889 			    OPEN_DELEGATE_WRITE && mode == FWRITE) {
3890 				dsp->rds_finfo->rf_dinfo.rd_time_lastwrite =
3891 				    gethrestime_sec();
3892 			}
3893 
3894 			/*
3895 			 * XXX - what happens if this is a WRITE and the
3896 			 * delegation type of for READ.
3897 			 */
3898 			rfs4_deleg_state_rele(dsp);
3899 
3900 			return (stat);
3901 		}
3902 		/*
3903 		 * If we got this far, something bad happened
3904 		 */
3905 		return (NFS4ERR_BAD_STATEID);
3906 	}
3907 }
3908 
3909 
3910 /*
3911  * This is a special function in that for the file struct provided the
3912  * server wants to remove/close all current state associated with the
3913  * file.  The prime use of this would be with OP_REMOVE to force the
3914  * release of state and particularly of file locks.
3915  *
3916  * There is an assumption that there is no delegations outstanding on
3917  * this file at this point.  The caller should have waited for those
3918  * to be returned or revoked.
3919  */
3920 void
3921 rfs4_close_all_state(rfs4_file_t *fp)
3922 {
3923 	rfs4_state_t *sp;
3924 
3925 	rfs4_dbe_lock(fp->rf_dbe);
3926 
3927 #ifdef DEBUG
3928 	/* only applies when server is handing out delegations */
3929 	if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE)
3930 		ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
3931 #endif
3932 
3933 	/* No delegations for this file */
3934 	ASSERT(list_is_empty(&fp->rf_delegstatelist));
3935 
3936 	/* Make sure that it can not be found */
3937 	rfs4_dbe_invalidate(fp->rf_dbe);
3938 
3939 	if (fp->rf_vp == NULL) {
3940 		rfs4_dbe_unlock(fp->rf_dbe);
3941 		return;
3942 	}
3943 	rfs4_dbe_unlock(fp->rf_dbe);
3944 
3945 	/*
3946 	 * Hold as writer to prevent other server threads from
3947 	 * processing requests related to the file while all state is
3948 	 * being removed.
3949 	 */
3950 	rw_enter(&fp->rf_file_rwlock, RW_WRITER);
3951 
3952 	/* Remove ALL state from the file */
3953 	while ((sp = rfs4_findstate_by_file(fp)) != NULL) {
3954 		rfs4_state_close(sp, FALSE, FALSE, CRED());
3955 		rfs4_state_rele_nounlock(sp);
3956 	}
3957 
3958 	/*
3959 	 * This is only safe since there are no further references to
3960 	 * the file.
3961 	 */
3962 	rfs4_dbe_lock(fp->rf_dbe);
3963 	if (fp->rf_vp) {
3964 		vnode_t *vp = fp->rf_vp;
3965 
3966 		mutex_enter(&vp->v_vsd_lock);
3967 		(void) vsd_set(vp, nfs4_srv_vkey, NULL);
3968 		mutex_exit(&vp->v_vsd_lock);
3969 		VN_RELE(vp);
3970 		fp->rf_vp = NULL;
3971 	}
3972 	rfs4_dbe_unlock(fp->rf_dbe);
3973 
3974 	/* Finally let other references to proceed */
3975 	rw_exit(&fp->rf_file_rwlock);
3976 }
3977 
3978 /*
3979  * This function is used as a target for the rfs4_dbe_walk() call
3980  * below.  The purpose of this function is to see if the
3981  * lockowner_state refers to a file that resides within the exportinfo
3982  * export.  If so, then remove the lock_owner state (file locks and
3983  * share "locks") for this object since the intent is the server is
3984  * unexporting the specified directory.  Be sure to invalidate the
3985  * object after the state has been released
3986  */
3987 static void
3988 rfs4_lo_state_walk_callout(rfs4_entry_t u_entry, void *e)
3989 {
3990 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
3991 	struct exportinfo *exi = (struct exportinfo *)e;
3992 	nfs_fh4_fmt_t   fhfmt4, *exi_fhp, *finfo_fhp;
3993 	fhandle_t *efhp;
3994 
3995 	efhp = (fhandle_t *)&exi->exi_fh;
3996 	exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
3997 
3998 	FH_TO_FMT4(efhp, exi_fhp);
3999 
4000 	finfo_fhp = (nfs_fh4_fmt_t *)lsp->rls_state->rs_finfo->
4001 	    rf_filehandle.nfs_fh4_val;
4002 
4003 	if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4004 	    bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4005 	    exi_fhp->fh4_xlen) == 0) {
4006 		rfs4_state_close(lsp->rls_state, FALSE, FALSE, CRED());
4007 		rfs4_dbe_invalidate(lsp->rls_dbe);
4008 		rfs4_dbe_invalidate(lsp->rls_state->rs_dbe);
4009 	}
4010 }
4011 
4012 /*
4013  * This function is used as a target for the rfs4_dbe_walk() call
4014  * below.  The purpose of this function is to see if the state refers
4015  * to a file that resides within the exportinfo export.  If so, then
4016  * remove the open state for this object since the intent is the
4017  * server is unexporting the specified directory.  The main result for
4018  * this type of entry is to invalidate it such it will not be found in
4019  * the future.
4020  */
4021 static void
4022 rfs4_state_walk_callout(rfs4_entry_t u_entry, void *e)
4023 {
4024 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
4025 	struct exportinfo *exi = (struct exportinfo *)e;
4026 	nfs_fh4_fmt_t   fhfmt4, *exi_fhp, *finfo_fhp;
4027 	fhandle_t *efhp;
4028 
4029 	efhp = (fhandle_t *)&exi->exi_fh;
4030 	exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4031 
4032 	FH_TO_FMT4(efhp, exi_fhp);
4033 
4034 	finfo_fhp =
4035 	    (nfs_fh4_fmt_t *)sp->rs_finfo->rf_filehandle.nfs_fh4_val;
4036 
4037 	if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4038 	    bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4039 	    exi_fhp->fh4_xlen) == 0) {
4040 		rfs4_state_close(sp, TRUE, FALSE, CRED());
4041 		rfs4_dbe_invalidate(sp->rs_dbe);
4042 	}
4043 }
4044 
4045 /*
4046  * This function is used as a target for the rfs4_dbe_walk() call
4047  * below.  The purpose of this function is to see if the state refers
4048  * to a file that resides within the exportinfo export.  If so, then
4049  * remove the deleg state for this object since the intent is the
4050  * server is unexporting the specified directory.  The main result for
4051  * this type of entry is to invalidate it such it will not be found in
4052  * the future.
4053  */
4054 static void
4055 rfs4_deleg_state_walk_callout(rfs4_entry_t u_entry, void *e)
4056 {
4057 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
4058 	struct exportinfo *exi = (struct exportinfo *)e;
4059 	nfs_fh4_fmt_t   fhfmt4, *exi_fhp, *finfo_fhp;
4060 	fhandle_t *efhp;
4061 
4062 	efhp = (fhandle_t *)&exi->exi_fh;
4063 	exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4064 
4065 	FH_TO_FMT4(efhp, exi_fhp);
4066 
4067 	finfo_fhp =
4068 	    (nfs_fh4_fmt_t *)dsp->rds_finfo->rf_filehandle.nfs_fh4_val;
4069 
4070 	if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4071 	    bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4072 	    exi_fhp->fh4_xlen) == 0) {
4073 		rfs4_dbe_invalidate(dsp->rds_dbe);
4074 	}
4075 }
4076 
4077 /*
4078  * This function is used as a target for the rfs4_dbe_walk() call
4079  * below.  The purpose of this function is to see if the state refers
4080  * to a file that resides within the exportinfo export.  If so, then
4081  * release vnode hold for this object since the intent is the server
4082  * is unexporting the specified directory.  Invalidation will prevent
4083  * this struct from being found in the future.
4084  */
4085 static void
4086 rfs4_file_walk_callout(rfs4_entry_t u_entry, void *e)
4087 {
4088 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
4089 	struct exportinfo *exi = (struct exportinfo *)e;
4090 	nfs_fh4_fmt_t   fhfmt4, *exi_fhp, *finfo_fhp;
4091 	fhandle_t *efhp;
4092 
4093 	efhp = (fhandle_t *)&exi->exi_fh;
4094 	exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4095 
4096 	FH_TO_FMT4(efhp, exi_fhp);
4097 
4098 	finfo_fhp = (nfs_fh4_fmt_t *)fp->rf_filehandle.nfs_fh4_val;
4099 
4100 	if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4101 	    bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4102 	    exi_fhp->fh4_xlen) == 0) {
4103 		if (fp->rf_vp) {
4104 			vnode_t *vp = fp->rf_vp;
4105 
4106 			/*
4107 			 * don't leak monitors and remove the reference
4108 			 * put on the vnode when the delegation was granted.
4109 			 */
4110 			if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ) {
4111 				(void) fem_uninstall(vp, deleg_rdops,
4112 				    (void *)fp);
4113 				vn_open_downgrade(vp, FREAD);
4114 			} else if (fp->rf_dinfo.rd_dtype ==
4115 			    OPEN_DELEGATE_WRITE) {
4116 				(void) fem_uninstall(vp, deleg_wrops,
4117 				    (void *)fp);
4118 				vn_open_downgrade(vp, FREAD|FWRITE);
4119 			}
4120 			mutex_enter(&vp->v_vsd_lock);
4121 			(void) vsd_set(vp, nfs4_srv_vkey, NULL);
4122 			mutex_exit(&vp->v_vsd_lock);
4123 			VN_RELE(vp);
4124 			fp->rf_vp = NULL;
4125 		}
4126 		rfs4_dbe_invalidate(fp->rf_dbe);
4127 	}
4128 }
4129 
4130 /*
4131  * Given a directory that is being unexported, cleanup/release all
4132  * state in the server that refers to objects residing underneath this
4133  * particular export.  The ordering of the release is important.
4134  * Lock_owner, then state and then file.
4135  *
4136  * NFS zones note: nfs_export.c:unexport() calls this from a
4137  * thread in the global zone for NGZ data structures, so we
4138  * CANNOT use zone_getspecific anywhere in this code path.
4139  */
4140 void
4141 rfs4_clean_state_exi(nfs_export_t *ne, struct exportinfo *exi)
4142 {
4143 	nfs_globals_t *ng;
4144 	nfs4_srv_t *nsrv4;
4145 
4146 	ng = ne->ne_globals;
4147 	ASSERT(ng->nfs_zoneid == exi->exi_zoneid);
4148 	nsrv4 = ng->nfs4_srv;
4149 
4150 	mutex_enter(&nsrv4->state_lock);
4151 
4152 	if (nsrv4->nfs4_server_state == NULL) {
4153 		mutex_exit(&nsrv4->state_lock);
4154 		return;
4155 	}
4156 
4157 	rfs4_dbe_walk(nsrv4->rfs4_lo_state_tab,
4158 	    rfs4_lo_state_walk_callout, exi);
4159 	rfs4_dbe_walk(nsrv4->rfs4_state_tab, rfs4_state_walk_callout, exi);
4160 	rfs4_dbe_walk(nsrv4->rfs4_deleg_state_tab,
4161 	    rfs4_deleg_state_walk_callout, exi);
4162 	rfs4_dbe_walk(nsrv4->rfs4_file_tab, rfs4_file_walk_callout, exi);
4163 
4164 	mutex_exit(&nsrv4->state_lock);
4165 }
4166