xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs4_state.c (revision 9e3493cb8a0cfe96c9aef9b7da42c6c9b5c24b43)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * Copyright 2018 Nexenta Systems, Inc.
28  * Copyright 2019 Nexenta by DDN, Inc.
29  */
30 
31 #include <sys/systm.h>
32 #include <sys/kmem.h>
33 #include <sys/cmn_err.h>
34 #include <sys/atomic.h>
35 #include <sys/clconf.h>
36 #include <sys/cladm.h>
37 #include <sys/flock.h>
38 #include <nfs/export.h>
39 #include <nfs/nfs.h>
40 #include <nfs/nfs4.h>
41 #include <nfs/nfssys.h>
42 #include <nfs/lm.h>
43 #include <sys/pathname.h>
44 #include <sys/sdt.h>
45 #include <sys/nvpair.h>
46 
47 extern u_longlong_t nfs4_srv_caller_id;
48 
49 extern uint_t nfs4_srv_vkey;
50 
51 stateid4 special0 = {
52 	0,
53 	{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
54 };
55 
56 stateid4 special1 = {
57 	0xffffffff,
58 	{
59 		(char)0xff, (char)0xff, (char)0xff, (char)0xff,
60 		(char)0xff, (char)0xff, (char)0xff, (char)0xff,
61 		(char)0xff, (char)0xff, (char)0xff, (char)0xff
62 	}
63 };
64 
65 
66 #define	ISSPECIAL(id)  (stateid4_cmp(id, &special0) || \
67 			stateid4_cmp(id, &special1))
68 
69 /* For embedding the cluster nodeid into our clientid */
70 #define	CLUSTER_NODEID_SHIFT	24
71 #define	CLUSTER_MAX_NODEID	255
72 
73 #ifdef DEBUG
74 int rfs4_debug;
75 #endif
76 
77 rfs4_db_mem_cache_t rfs4_db_mem_cache_table[RFS4_DB_MEM_CACHE_NUM];
78 static uint32_t rfs4_database_debug = 0x00;
79 
80 /* CSTYLED */
81 static void rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf);
82 static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf);
83 static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip);
84 static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip);
85 
86 /*
87  * Couple of simple init/destroy functions for a general waiter
88  */
89 void
90 rfs4_sw_init(rfs4_state_wait_t *swp)
91 {
92 	mutex_init(swp->sw_cv_lock, NULL, MUTEX_DEFAULT, NULL);
93 	cv_init(swp->sw_cv, NULL, CV_DEFAULT, NULL);
94 	swp->sw_active = FALSE;
95 	swp->sw_wait_count = 0;
96 }
97 
98 void
99 rfs4_sw_destroy(rfs4_state_wait_t *swp)
100 {
101 	mutex_destroy(swp->sw_cv_lock);
102 	cv_destroy(swp->sw_cv);
103 }
104 
105 void
106 rfs4_sw_enter(rfs4_state_wait_t *swp)
107 {
108 	mutex_enter(swp->sw_cv_lock);
109 	while (swp->sw_active) {
110 		swp->sw_wait_count++;
111 		cv_wait(swp->sw_cv, swp->sw_cv_lock);
112 		swp->sw_wait_count--;
113 	}
114 	ASSERT(swp->sw_active == FALSE);
115 	swp->sw_active = TRUE;
116 	mutex_exit(swp->sw_cv_lock);
117 }
118 
119 void
120 rfs4_sw_exit(rfs4_state_wait_t *swp)
121 {
122 	mutex_enter(swp->sw_cv_lock);
123 	ASSERT(swp->sw_active == TRUE);
124 	swp->sw_active = FALSE;
125 	if (swp->sw_wait_count != 0)
126 		cv_broadcast(swp->sw_cv);
127 	mutex_exit(swp->sw_cv_lock);
128 }
129 
130 static void
131 deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
132 {
133 	lock_owner4 *slo = &sres->LOCK4res_u.denied.owner;
134 	lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner;
135 
136 	if (sres->status == NFS4ERR_DENIED) {
137 		dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP);
138 		bcopy(slo->owner_val, dlo->owner_val, slo->owner_len);
139 	}
140 }
141 
142 /*
143  * CPR callback id -- not related to v4 callbacks
144  */
145 static callb_id_t cpr_id = 0;
146 
147 static void
148 deep_lock_free(LOCK4res *res)
149 {
150 	lock_owner4 *lo = &res->LOCK4res_u.denied.owner;
151 
152 	if (res->status == NFS4ERR_DENIED)
153 		kmem_free(lo->owner_val, lo->owner_len);
154 }
155 
156 static void
157 deep_open_copy(OPEN4res *dres, OPEN4res *sres)
158 {
159 	nfsace4 *sacep, *dacep;
160 
161 	if (sres->status != NFS4_OK) {
162 		return;
163 	}
164 
165 	dres->attrset = sres->attrset;
166 
167 	switch (sres->delegation.delegation_type) {
168 	case OPEN_DELEGATE_NONE:
169 		return;
170 	case OPEN_DELEGATE_READ:
171 		sacep = &sres->delegation.open_delegation4_u.read.permissions;
172 		dacep = &dres->delegation.open_delegation4_u.read.permissions;
173 		break;
174 	case OPEN_DELEGATE_WRITE:
175 		sacep = &sres->delegation.open_delegation4_u.write.permissions;
176 		dacep = &dres->delegation.open_delegation4_u.write.permissions;
177 		break;
178 	}
179 	dacep->who.utf8string_val =
180 	    kmem_alloc(sacep->who.utf8string_len, KM_SLEEP);
181 	bcopy(sacep->who.utf8string_val, dacep->who.utf8string_val,
182 	    sacep->who.utf8string_len);
183 }
184 
185 static void
186 deep_open_free(OPEN4res *res)
187 {
188 	nfsace4 *acep;
189 	if (res->status != NFS4_OK)
190 		return;
191 
192 	switch (res->delegation.delegation_type) {
193 	case OPEN_DELEGATE_NONE:
194 		return;
195 	case OPEN_DELEGATE_READ:
196 		acep = &res->delegation.open_delegation4_u.read.permissions;
197 		break;
198 	case OPEN_DELEGATE_WRITE:
199 		acep = &res->delegation.open_delegation4_u.write.permissions;
200 		break;
201 	}
202 
203 	if (acep->who.utf8string_val) {
204 		kmem_free(acep->who.utf8string_val, acep->who.utf8string_len);
205 		acep->who.utf8string_val = NULL;
206 	}
207 }
208 
209 void
210 rfs4_free_reply(nfs_resop4 *rp)
211 {
212 	switch (rp->resop) {
213 	case OP_LOCK:
214 		deep_lock_free(&rp->nfs_resop4_u.oplock);
215 		break;
216 	case OP_OPEN:
217 		deep_open_free(&rp->nfs_resop4_u.opopen);
218 	default:
219 		break;
220 	}
221 }
222 
223 void
224 rfs4_copy_reply(nfs_resop4 *dst, nfs_resop4 *src)
225 {
226 	*dst = *src;
227 
228 	/* Handle responses that need deep copy */
229 	switch (src->resop) {
230 	case OP_LOCK:
231 		deep_lock_copy(&dst->nfs_resop4_u.oplock,
232 		    &src->nfs_resop4_u.oplock);
233 		break;
234 	case OP_OPEN:
235 		deep_open_copy(&dst->nfs_resop4_u.opopen,
236 		    &src->nfs_resop4_u.opopen);
237 		break;
238 	default:
239 		break;
240 	};
241 }
242 
243 /*
244  * This is the implementation of the underlying state engine. The
245  * public interface to this engine is described by
246  * nfs4_state.h. Callers to the engine should hold no state engine
247  * locks when they call in to it. If the protocol needs to lock data
248  * structures it should do so after acquiring all references to them
249  * first and then follow the following lock order:
250  *
251  *	client > openowner > state > lo_state > lockowner > file.
252  *
253  * Internally we only allow a thread to hold one hash bucket lock at a
254  * time and the lock is higher in the lock order (must be acquired
255  * first) than the data structure that is on that hash list.
256  *
257  * If a new reference was acquired by the caller, that reference needs
258  * to be released after releasing all acquired locks with the
259  * corresponding rfs4_*_rele routine.
260  */
261 
262 /*
263  * This code is some what prototypical for now. Its purpose currently is to
264  * implement the interfaces sufficiently to finish the higher protocol
265  * elements. This will be replaced by a dynamically resizeable tables
266  * backed by kmem_cache allocator. However synchronization is handled
267  * correctly (I hope) and will not change by much.  The mutexes for
268  * the hash buckets that can be used to create new instances of data
269  * structures  might be good candidates to evolve into reader writer
270  * locks. If it has to do a creation, it would be holding the
271  * mutex across a kmem_alloc with KM_SLEEP specified.
272  */
273 
274 #ifdef DEBUG
275 #define	TABSIZE 17
276 #else
277 #define	TABSIZE 2047
278 #endif
279 
280 #define	ADDRHASH(key) ((unsigned long)(key) >> 3)
281 
282 #define	MAXTABSZ 1024*1024
283 
284 /* The values below are rfs4_lease_time units */
285 
286 #ifdef DEBUG
287 #define	CLIENT_CACHE_TIME 1
288 #define	OPENOWNER_CACHE_TIME 1
289 #define	STATE_CACHE_TIME 1
290 #define	LO_STATE_CACHE_TIME 1
291 #define	LOCKOWNER_CACHE_TIME 1
292 #define	FILE_CACHE_TIME 3
293 #define	DELEG_STATE_CACHE_TIME 1
294 #else
295 #define	CLIENT_CACHE_TIME 10
296 #define	OPENOWNER_CACHE_TIME 5
297 #define	STATE_CACHE_TIME 1
298 #define	LO_STATE_CACHE_TIME 1
299 #define	LOCKOWNER_CACHE_TIME 3
300 #define	FILE_CACHE_TIME 40
301 #define	DELEG_STATE_CACHE_TIME 1
302 #endif
303 
304 /*
305  * NFSv4 server state databases
306  *
307  * Initilized when the module is loaded and used by NFSv4 state tables.
308  * These kmem_cache databases are global, the tables that make use of these
309  * are per zone.
310  */
311 kmem_cache_t *rfs4_client_mem_cache;
312 kmem_cache_t *rfs4_clntIP_mem_cache;
313 kmem_cache_t *rfs4_openown_mem_cache;
314 kmem_cache_t *rfs4_openstID_mem_cache;
315 kmem_cache_t *rfs4_lockstID_mem_cache;
316 kmem_cache_t *rfs4_lockown_mem_cache;
317 kmem_cache_t *rfs4_file_mem_cache;
318 kmem_cache_t *rfs4_delegstID_mem_cache;
319 
320 /*
321  * NFSv4 state table functions
322  */
323 static bool_t rfs4_client_create(rfs4_entry_t, void *);
324 static void rfs4_dss_remove_cpleaf(rfs4_client_t *);
325 static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *);
326 static void rfs4_client_destroy(rfs4_entry_t);
327 static bool_t rfs4_client_expiry(rfs4_entry_t);
328 static uint32_t clientid_hash(void *);
329 static bool_t clientid_compare(rfs4_entry_t, void *);
330 static void *clientid_mkkey(rfs4_entry_t);
331 static uint32_t nfsclnt_hash(void *);
332 static bool_t nfsclnt_compare(rfs4_entry_t, void *);
333 static void *nfsclnt_mkkey(rfs4_entry_t);
334 static bool_t rfs4_clntip_expiry(rfs4_entry_t);
335 static void rfs4_clntip_destroy(rfs4_entry_t);
336 static bool_t rfs4_clntip_create(rfs4_entry_t, void *);
337 static uint32_t clntip_hash(void *);
338 static bool_t clntip_compare(rfs4_entry_t, void *);
339 static void *clntip_mkkey(rfs4_entry_t);
340 static bool_t rfs4_openowner_create(rfs4_entry_t, void *);
341 static void rfs4_openowner_destroy(rfs4_entry_t);
342 static bool_t rfs4_openowner_expiry(rfs4_entry_t);
343 static uint32_t openowner_hash(void *);
344 static bool_t openowner_compare(rfs4_entry_t, void *);
345 static void *openowner_mkkey(rfs4_entry_t);
346 static bool_t rfs4_state_create(rfs4_entry_t, void *);
347 static void rfs4_state_destroy(rfs4_entry_t);
348 static bool_t rfs4_state_expiry(rfs4_entry_t);
349 static uint32_t state_hash(void *);
350 static bool_t state_compare(rfs4_entry_t, void *);
351 static void *state_mkkey(rfs4_entry_t);
352 static uint32_t state_owner_file_hash(void *);
353 static bool_t state_owner_file_compare(rfs4_entry_t, void *);
354 static void *state_owner_file_mkkey(rfs4_entry_t);
355 static uint32_t state_file_hash(void *);
356 static bool_t state_file_compare(rfs4_entry_t, void *);
357 static void *state_file_mkkey(rfs4_entry_t);
358 static bool_t rfs4_lo_state_create(rfs4_entry_t, void *);
359 static void rfs4_lo_state_destroy(rfs4_entry_t);
360 static bool_t rfs4_lo_state_expiry(rfs4_entry_t);
361 static uint32_t lo_state_hash(void *);
362 static bool_t lo_state_compare(rfs4_entry_t, void *);
363 static void *lo_state_mkkey(rfs4_entry_t);
364 static uint32_t lo_state_lo_hash(void *);
365 static bool_t lo_state_lo_compare(rfs4_entry_t, void *);
366 static void *lo_state_lo_mkkey(rfs4_entry_t);
367 static bool_t rfs4_lockowner_create(rfs4_entry_t, void *);
368 static void rfs4_lockowner_destroy(rfs4_entry_t);
369 static bool_t rfs4_lockowner_expiry(rfs4_entry_t);
370 static uint32_t lockowner_hash(void *);
371 static bool_t lockowner_compare(rfs4_entry_t, void *);
372 static void *lockowner_mkkey(rfs4_entry_t);
373 static uint32_t pid_hash(void *);
374 static bool_t pid_compare(rfs4_entry_t, void *);
375 static void *pid_mkkey(rfs4_entry_t);
376 static bool_t rfs4_file_create(rfs4_entry_t, void *);
377 static void rfs4_file_destroy(rfs4_entry_t);
378 static uint32_t file_hash(void *);
379 static bool_t file_compare(rfs4_entry_t, void *);
380 static void *file_mkkey(rfs4_entry_t);
381 static bool_t rfs4_deleg_state_create(rfs4_entry_t, void *);
382 static void rfs4_deleg_state_destroy(rfs4_entry_t);
383 static bool_t rfs4_deleg_state_expiry(rfs4_entry_t);
384 static uint32_t deleg_hash(void *);
385 static bool_t deleg_compare(rfs4_entry_t, void *);
386 static void *deleg_mkkey(rfs4_entry_t);
387 static uint32_t deleg_state_hash(void *);
388 static bool_t deleg_state_compare(rfs4_entry_t, void *);
389 static void *deleg_state_mkkey(rfs4_entry_t);
390 
391 static void rfs4_state_rele_nounlock(rfs4_state_t *);
392 
393 static int rfs4_ss_enabled = 0;
394 
395 extern void (*rfs4_client_clrst)(struct nfs4clrst_args *);
396 
397 void
398 rfs4_ss_pnfree(rfs4_ss_pn_t *ss_pn)
399 {
400 	kmem_free(ss_pn, sizeof (rfs4_ss_pn_t));
401 }
402 
403 static rfs4_ss_pn_t *
404 rfs4_ss_pnalloc(char *dir, char *leaf)
405 {
406 	rfs4_ss_pn_t *ss_pn;
407 	int dir_len, leaf_len;
408 
409 	/*
410 	 * validate we have a resonable path
411 	 * (account for the '/' and trailing null)
412 	 */
413 	if ((dir_len = strlen(dir)) > MAXPATHLEN ||
414 	    (leaf_len = strlen(leaf)) > MAXNAMELEN ||
415 	    (dir_len + leaf_len + 2) > MAXPATHLEN) {
416 		return (NULL);
417 	}
418 
419 	ss_pn = kmem_alloc(sizeof (rfs4_ss_pn_t), KM_SLEEP);
420 
421 	(void) snprintf(ss_pn->pn, MAXPATHLEN, "%s/%s", dir, leaf);
422 	/* Handy pointer to just the leaf name */
423 	ss_pn->leaf = ss_pn->pn + dir_len + 1;
424 	return (ss_pn);
425 }
426 
427 
428 /*
429  * Move the "leaf" filename from "sdir" directory
430  * to the "ddir" directory. Return the pathname of
431  * the destination unless the rename fails in which
432  * case we need to return the source pathname.
433  */
434 static rfs4_ss_pn_t *
435 rfs4_ss_movestate(char *sdir, char *ddir, char *leaf)
436 {
437 	rfs4_ss_pn_t *src, *dst;
438 
439 	if ((src = rfs4_ss_pnalloc(sdir, leaf)) == NULL)
440 		return (NULL);
441 
442 	if ((dst = rfs4_ss_pnalloc(ddir, leaf)) == NULL) {
443 		rfs4_ss_pnfree(src);
444 		return (NULL);
445 	}
446 
447 	/*
448 	 * If the rename fails we shall return the src
449 	 * pathname and free the dst. Otherwise we need
450 	 * to free the src and return the dst pathanme.
451 	 */
452 	if (vn_rename(src->pn, dst->pn, UIO_SYSSPACE)) {
453 		rfs4_ss_pnfree(dst);
454 		return (src);
455 	}
456 	rfs4_ss_pnfree(src);
457 	return (dst);
458 }
459 
460 
461 static rfs4_oldstate_t *
462 rfs4_ss_getstate(vnode_t *dvp, rfs4_ss_pn_t *ss_pn)
463 {
464 	struct uio uio;
465 	struct iovec iov[3];
466 
467 	rfs4_oldstate_t *cl_ss = NULL;
468 	vnode_t *vp;
469 	vattr_t va;
470 	uint_t id_len;
471 	int err, kill_file, file_vers;
472 
473 	if (ss_pn == NULL)
474 		return (NULL);
475 
476 	/*
477 	 * open the state file.
478 	 */
479 	if (vn_open(ss_pn->pn, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0) != 0) {
480 		return (NULL);
481 	}
482 
483 	if (vp->v_type != VREG) {
484 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
485 		VN_RELE(vp);
486 		return (NULL);
487 	}
488 
489 	err = VOP_ACCESS(vp, VREAD, 0, CRED(), NULL);
490 	if (err) {
491 		/*
492 		 * We don't have read access? better get the heck out.
493 		 */
494 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
495 		VN_RELE(vp);
496 		return (NULL);
497 	}
498 
499 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
500 	/*
501 	 * get the file size to do some basic validation
502 	 */
503 	va.va_mask = AT_SIZE;
504 	err = VOP_GETATTR(vp, &va, 0, CRED(), NULL);
505 
506 	kill_file = (va.va_size == 0 || va.va_size <
507 	    (NFS4_VERIFIER_SIZE + sizeof (uint_t)+1));
508 
509 	if (err || kill_file) {
510 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
511 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
512 		VN_RELE(vp);
513 		if (kill_file) {
514 			(void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0);
515 		}
516 		return (NULL);
517 	}
518 
519 	cl_ss = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
520 
521 	/*
522 	 * build iovecs to read in the file_version, verifier and id_len
523 	 */
524 	iov[0].iov_base = (caddr_t)&file_vers;
525 	iov[0].iov_len = sizeof (int);
526 	iov[1].iov_base = (caddr_t)&cl_ss->cl_id4.verifier;
527 	iov[1].iov_len = NFS4_VERIFIER_SIZE;
528 	iov[2].iov_base = (caddr_t)&id_len;
529 	iov[2].iov_len = sizeof (uint_t);
530 
531 	uio.uio_iov = iov;
532 	uio.uio_iovcnt = 3;
533 	uio.uio_segflg = UIO_SYSSPACE;
534 	uio.uio_loffset = 0;
535 	uio.uio_resid = sizeof (int) + NFS4_VERIFIER_SIZE + sizeof (uint_t);
536 
537 	if (err = VOP_READ(vp, &uio, FREAD, CRED(), NULL)) {
538 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
539 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
540 		VN_RELE(vp);
541 		kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
542 		return (NULL);
543 	}
544 
545 	/*
546 	 * if the file_version doesn't match or if the
547 	 * id_len is zero or the combination of the verifier,
548 	 * id_len and id_val is bigger than the file we have
549 	 * a problem. If so ditch the file.
550 	 */
551 	kill_file = (file_vers != NFS4_SS_VERSION || id_len == 0 ||
552 	    (id_len + NFS4_VERIFIER_SIZE + sizeof (uint_t)) > va.va_size);
553 
554 	if (err || kill_file) {
555 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
556 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
557 		VN_RELE(vp);
558 		kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
559 		if (kill_file) {
560 			(void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0);
561 		}
562 		return (NULL);
563 	}
564 
565 	/*
566 	 * now get the client id value
567 	 */
568 	cl_ss->cl_id4.id_val = kmem_alloc(id_len, KM_SLEEP);
569 	iov[0].iov_base = cl_ss->cl_id4.id_val;
570 	iov[0].iov_len = id_len;
571 
572 	uio.uio_iov = iov;
573 	uio.uio_iovcnt = 1;
574 	uio.uio_segflg = UIO_SYSSPACE;
575 	uio.uio_resid = cl_ss->cl_id4.id_len = id_len;
576 
577 	if (err = VOP_READ(vp, &uio, FREAD, CRED(), NULL)) {
578 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
579 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
580 		VN_RELE(vp);
581 		kmem_free(cl_ss->cl_id4.id_val, id_len);
582 		kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
583 		return (NULL);
584 	}
585 
586 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
587 	(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
588 	VN_RELE(vp);
589 	return (cl_ss);
590 }
591 
592 #ifdef	nextdp
593 #undef nextdp
594 #endif
595 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
596 
597 /*
598  * Add entries from statedir to supplied oldstate list.
599  * Optionally, move all entries from statedir -> destdir.
600  */
601 void
602 rfs4_ss_oldstate(rfs4_oldstate_t *oldstate, char *statedir, char *destdir)
603 {
604 	rfs4_ss_pn_t *ss_pn;
605 	rfs4_oldstate_t *cl_ss = NULL;
606 	char	*dirt = NULL;
607 	int	err, dir_eof = 0, size = 0;
608 	vnode_t *dvp;
609 	struct iovec iov;
610 	struct uio uio;
611 	struct dirent64 *dep;
612 	offset_t dirchunk_offset = 0;
613 
614 	/*
615 	 * open the state directory
616 	 */
617 	if (vn_open(statedir, UIO_SYSSPACE, FREAD, 0, &dvp, 0, 0))
618 		return;
619 
620 	if (dvp->v_type != VDIR || VOP_ACCESS(dvp, VREAD, 0, CRED(), NULL))
621 		goto out;
622 
623 	dirt = kmem_alloc(RFS4_SS_DIRSIZE, KM_SLEEP);
624 
625 	/*
626 	 * Get and process the directory entries
627 	 */
628 	while (!dir_eof) {
629 		(void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL);
630 		iov.iov_base = dirt;
631 		iov.iov_len = RFS4_SS_DIRSIZE;
632 		uio.uio_iov = &iov;
633 		uio.uio_iovcnt = 1;
634 		uio.uio_segflg = UIO_SYSSPACE;
635 		uio.uio_loffset = dirchunk_offset;
636 		uio.uio_resid = RFS4_SS_DIRSIZE;
637 
638 		err = VOP_READDIR(dvp, &uio, CRED(), &dir_eof, NULL, 0);
639 		VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
640 		if (err)
641 			goto out;
642 
643 		size = RFS4_SS_DIRSIZE - uio.uio_resid;
644 
645 		/*
646 		 * Process all the directory entries in this
647 		 * readdir chunk
648 		 */
649 		for (dep = (struct dirent64 *)dirt; size > 0;
650 		    dep = nextdp(dep)) {
651 
652 			size -= dep->d_reclen;
653 			dirchunk_offset = dep->d_off;
654 
655 			/*
656 			 * Skip '.' and '..'
657 			 */
658 			if (NFS_IS_DOTNAME(dep->d_name))
659 				continue;
660 
661 			ss_pn = rfs4_ss_pnalloc(statedir, dep->d_name);
662 			if (ss_pn == NULL)
663 				continue;
664 
665 			if (cl_ss = rfs4_ss_getstate(dvp, ss_pn)) {
666 				if (destdir != NULL) {
667 					rfs4_ss_pnfree(ss_pn);
668 					cl_ss->ss_pn = rfs4_ss_movestate(
669 					    statedir, destdir, dep->d_name);
670 				} else {
671 					cl_ss->ss_pn = ss_pn;
672 				}
673 				insque(cl_ss, oldstate);
674 			} else {
675 				rfs4_ss_pnfree(ss_pn);
676 			}
677 		}
678 	}
679 
680 out:
681 	(void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL);
682 	VN_RELE(dvp);
683 	if (dirt)
684 		kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE);
685 }
686 
687 static void
688 rfs4_ss_init(nfs4_srv_t *nsrv4)
689 {
690 	int npaths = 1;
691 	char *default_dss_path = NFS4_DSS_VAR_DIR;
692 
693 	/* read the default stable storage state */
694 	rfs4_dss_readstate(nsrv4, npaths, &default_dss_path);
695 
696 	rfs4_ss_enabled = 1;
697 }
698 
699 static void
700 rfs4_ss_fini(nfs4_srv_t *nsrv4)
701 {
702 	rfs4_servinst_t *sip;
703 
704 	mutex_enter(&nsrv4->servinst_lock);
705 	sip = nsrv4->nfs4_cur_servinst;
706 	while (sip != NULL) {
707 		rfs4_dss_clear_oldstate(sip);
708 		sip = sip->next;
709 	}
710 	mutex_exit(&nsrv4->servinst_lock);
711 }
712 
713 /*
714  * Remove all oldstate files referenced by this servinst.
715  */
716 static void
717 rfs4_dss_clear_oldstate(rfs4_servinst_t *sip)
718 {
719 	rfs4_oldstate_t *os_head, *osp;
720 
721 	rw_enter(&sip->oldstate_lock, RW_WRITER);
722 	os_head = sip->oldstate;
723 
724 	if (os_head == NULL) {
725 		rw_exit(&sip->oldstate_lock);
726 		return;
727 	}
728 
729 	/* skip dummy entry */
730 	osp = os_head->next;
731 	while (osp != os_head) {
732 		char *leaf = osp->ss_pn->leaf;
733 		rfs4_oldstate_t *os_next;
734 
735 		rfs4_dss_remove_leaf(sip, NFS4_DSS_OLDSTATE_LEAF, leaf);
736 
737 		if (osp->cl_id4.id_val)
738 			kmem_free(osp->cl_id4.id_val, osp->cl_id4.id_len);
739 		rfs4_ss_pnfree(osp->ss_pn);
740 
741 		os_next = osp->next;
742 		remque(osp);
743 		kmem_free(osp, sizeof (rfs4_oldstate_t));
744 		osp = os_next;
745 	}
746 
747 	rw_exit(&sip->oldstate_lock);
748 }
749 
750 /*
751  * Form the state and oldstate paths, and read in the stable storage files.
752  */
753 void
754 rfs4_dss_readstate(nfs4_srv_t *nsrv4, int npaths, char **paths)
755 {
756 	int i;
757 	char *state, *oldstate;
758 
759 	state = kmem_alloc(MAXPATHLEN, KM_SLEEP);
760 	oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP);
761 
762 	for (i = 0; i < npaths; i++) {
763 		char *path = paths[i];
764 
765 		(void) sprintf(state, "%s/%s", path, NFS4_DSS_STATE_LEAF);
766 		(void) sprintf(oldstate, "%s/%s", path, NFS4_DSS_OLDSTATE_LEAF);
767 
768 		/*
769 		 * Populate the current server instance's oldstate list.
770 		 *
771 		 * 1. Read stable storage data from old state directory,
772 		 *    leaving its contents alone.
773 		 *
774 		 * 2. Read stable storage data from state directory,
775 		 *    and move the latter's contents to old state
776 		 *    directory.
777 		 */
778 		/* CSTYLED */
779 		rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, oldstate, NULL);
780 		/* CSTYLED */
781 		rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate, state, oldstate);
782 	}
783 
784 	kmem_free(state, MAXPATHLEN);
785 	kmem_free(oldstate, MAXPATHLEN);
786 }
787 
788 
789 /*
790  * Check if we are still in grace and if the client can be
791  * granted permission to perform reclaims.
792  */
793 void
794 rfs4_ss_chkclid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
795 {
796 	rfs4_servinst_t *sip;
797 
798 	/*
799 	 * It should be sufficient to check the oldstate data for just
800 	 * this client's instance. However, since our per-instance
801 	 * client grouping is solely temporal, HA-NFSv4 RG failover
802 	 * might result in clients of the same RG being partitioned into
803 	 * separate instances.
804 	 *
805 	 * Until the client grouping is improved, we must check the
806 	 * oldstate data for all instances with an active grace period.
807 	 *
808 	 * This also serves as the mechanism to remove stale oldstate data.
809 	 * The first time we check an instance after its grace period has
810 	 * expired, the oldstate data should be cleared.
811 	 *
812 	 * Start at the current instance, and walk the list backwards
813 	 * to the first.
814 	 */
815 	mutex_enter(&nsrv4->servinst_lock);
816 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
817 		rfs4_ss_chkclid_sip(cp, sip);
818 
819 		/* if the above check found this client, we're done */
820 		if (cp->rc_can_reclaim)
821 			break;
822 	}
823 	mutex_exit(&nsrv4->servinst_lock);
824 }
825 
826 static void
827 rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip)
828 {
829 	rfs4_oldstate_t *osp, *os_head;
830 
831 	/* short circuit everything if this server instance has no oldstate */
832 	rw_enter(&sip->oldstate_lock, RW_READER);
833 	os_head = sip->oldstate;
834 	rw_exit(&sip->oldstate_lock);
835 	if (os_head == NULL)
836 		return;
837 
838 	/*
839 	 * If this server instance is no longer in a grace period then
840 	 * the client won't be able to reclaim. No further need for this
841 	 * instance's oldstate data, so it can be cleared.
842 	 */
843 	if (!rfs4_servinst_in_grace(sip))
844 		return;
845 
846 	/* this instance is still in grace; search for the clientid */
847 
848 	rw_enter(&sip->oldstate_lock, RW_READER);
849 
850 	os_head = sip->oldstate;
851 	/* skip dummy entry */
852 	osp = os_head->next;
853 	while (osp != os_head) {
854 		if (osp->cl_id4.id_len == cp->rc_nfs_client.id_len) {
855 			if (bcmp(osp->cl_id4.id_val, cp->rc_nfs_client.id_val,
856 			    osp->cl_id4.id_len) == 0) {
857 				cp->rc_can_reclaim = 1;
858 				break;
859 			}
860 		}
861 		osp = osp->next;
862 	}
863 
864 	rw_exit(&sip->oldstate_lock);
865 }
866 
867 /*
868  * Place client information into stable storage: 1/3.
869  * First, generate the leaf filename, from the client's IP address and
870  * the server-generated short-hand clientid.
871  */
872 void
873 rfs4_ss_clid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
874 {
875 	const char *kinet_ntop6(uchar_t *, char *, size_t);
876 	char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
877 	struct sockaddr *ca;
878 	uchar_t *b;
879 
880 	if (rfs4_ss_enabled == 0) {
881 		return;
882 	}
883 
884 	buf[0] = 0;
885 
886 	ca = (struct sockaddr *)&cp->rc_addr;
887 
888 	/*
889 	 * Convert the caller's IP address to a dotted string
890 	 */
891 	if (ca->sa_family == AF_INET) {
892 		b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
893 		(void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF,
894 		    b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
895 	} else if (ca->sa_family == AF_INET6) {
896 		struct sockaddr_in6 *sin6;
897 
898 		sin6 = (struct sockaddr_in6 *)ca;
899 		(void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
900 		    buf, INET6_ADDRSTRLEN);
901 	}
902 
903 	(void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
904 	    (longlong_t)cp->rc_clientid);
905 	rfs4_ss_clid_write(nsrv4, cp, leaf);
906 }
907 
908 /*
909  * Place client information into stable storage: 2/3.
910  * DSS: distributed stable storage: the file may need to be written to
911  * multiple directories.
912  */
913 static void
914 rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf)
915 {
916 	rfs4_servinst_t *sip;
917 
918 	/*
919 	 * It should be sufficient to write the leaf file to (all) DSS paths
920 	 * associated with just this client's instance. However, since our
921 	 * per-instance client grouping is solely temporal, HA-NFSv4 RG
922 	 * failover might result in us losing DSS data.
923 	 *
924 	 * Until the client grouping is improved, we must write the DSS data
925 	 * to all instances' paths. Start at the current instance, and
926 	 * walk the list backwards to the first.
927 	 */
928 	mutex_enter(&nsrv4->servinst_lock);
929 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
930 		int i, npaths = sip->dss_npaths;
931 
932 		/* write the leaf file to all DSS paths */
933 		for (i = 0; i < npaths; i++) {
934 			rfs4_dss_path_t *dss_path = sip->dss_paths[i];
935 
936 			/* HA-NFSv4 path might have been failed-away from us */
937 			if (dss_path == NULL)
938 				continue;
939 
940 			rfs4_ss_clid_write_one(cp, dss_path->path, leaf);
941 		}
942 	}
943 	mutex_exit(&nsrv4->servinst_lock);
944 }
945 
946 /*
947  * Place client information into stable storage: 3/3.
948  * Write the stable storage data to the requested file.
949  */
950 static void
951 rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf)
952 {
953 	int ioflag;
954 	int file_vers = NFS4_SS_VERSION;
955 	size_t dirlen;
956 	struct uio uio;
957 	struct iovec iov[4];
958 	char *dir;
959 	rfs4_ss_pn_t *ss_pn;
960 	vnode_t *vp;
961 	nfs_client_id4 *cl_id4 = &(cp->rc_nfs_client);
962 
963 	/* allow 2 extra bytes for '/' & NUL */
964 	dirlen = strlen(dss_path) + strlen(NFS4_DSS_STATE_LEAF) + 2;
965 	dir = kmem_alloc(dirlen, KM_SLEEP);
966 	(void) sprintf(dir, "%s/%s", dss_path, NFS4_DSS_STATE_LEAF);
967 
968 	ss_pn = rfs4_ss_pnalloc(dir, leaf);
969 	/* rfs4_ss_pnalloc takes its own copy */
970 	kmem_free(dir, dirlen);
971 	if (ss_pn == NULL)
972 		return;
973 
974 	if (vn_open(ss_pn->pn, UIO_SYSSPACE, FCREAT|FWRITE, 0600, &vp,
975 	    CRCREAT, 0)) {
976 		rfs4_ss_pnfree(ss_pn);
977 		return;
978 	}
979 
980 	/*
981 	 * We need to record leaf - i.e. the filename - so that we know
982 	 * what to remove, in the future. However, the dir part of cp->ss_pn
983 	 * should never be referenced directly, since it's potentially only
984 	 * one of several paths with this leaf in it.
985 	 */
986 	if (cp->rc_ss_pn != NULL) {
987 		if (strcmp(cp->rc_ss_pn->leaf, leaf) == 0) {
988 			/* we've already recorded *this* leaf */
989 			rfs4_ss_pnfree(ss_pn);
990 		} else {
991 			/* replace with this leaf */
992 			rfs4_ss_pnfree(cp->rc_ss_pn);
993 			cp->rc_ss_pn = ss_pn;
994 		}
995 	} else {
996 		cp->rc_ss_pn = ss_pn;
997 	}
998 
999 	/*
1000 	 * Build a scatter list that points to the nfs_client_id4
1001 	 */
1002 	iov[0].iov_base = (caddr_t)&file_vers;
1003 	iov[0].iov_len = sizeof (int);
1004 	iov[1].iov_base = (caddr_t)&(cl_id4->verifier);
1005 	iov[1].iov_len = NFS4_VERIFIER_SIZE;
1006 	iov[2].iov_base = (caddr_t)&(cl_id4->id_len);
1007 	iov[2].iov_len = sizeof (uint_t);
1008 	iov[3].iov_base = (caddr_t)cl_id4->id_val;
1009 	iov[3].iov_len = cl_id4->id_len;
1010 
1011 	uio.uio_iov = iov;
1012 	uio.uio_iovcnt = 4;
1013 	uio.uio_loffset = 0;
1014 	uio.uio_segflg = UIO_SYSSPACE;
1015 	uio.uio_llimit = (rlim64_t)MAXOFFSET_T;
1016 	uio.uio_resid = cl_id4->id_len + sizeof (int) +
1017 	    NFS4_VERIFIER_SIZE + sizeof (uint_t);
1018 
1019 	ioflag = uio.uio_fmode = (FWRITE|FSYNC);
1020 	uio.uio_extflg = UIO_COPY_DEFAULT;
1021 
1022 	(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1023 	/* write the full client id to the file. */
1024 	(void) VOP_WRITE(vp, &uio, ioflag, CRED(), NULL);
1025 	VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1026 
1027 	(void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
1028 	VN_RELE(vp);
1029 }
1030 
1031 /*
1032  * DSS: distributed stable storage.
1033  * Unpack the list of paths passed by nfsd.
1034  * Use nvlist_alloc(9F) to manage the data.
1035  * The caller is responsible for allocating and freeing the buffer.
1036  */
1037 int
1038 rfs4_dss_setpaths(char *buf, size_t buflen)
1039 {
1040 	int error;
1041 
1042 	/*
1043 	 * If this is a "warm start", i.e. we previously had DSS paths,
1044 	 * preserve the old paths.
1045 	 */
1046 	if (rfs4_dss_paths != NULL) {
1047 		/*
1048 		 * Before we lose the ptr, destroy the nvlist and pathnames
1049 		 * array from the warm start before this one.
1050 		 */
1051 		nvlist_free(rfs4_dss_oldpaths);
1052 		rfs4_dss_oldpaths = rfs4_dss_paths;
1053 	}
1054 
1055 	/* unpack the buffer into a searchable nvlist */
1056 	error = nvlist_unpack(buf, buflen, &rfs4_dss_paths, KM_SLEEP);
1057 	if (error)
1058 		return (error);
1059 
1060 	/*
1061 	 * Search the nvlist for the pathnames nvpair (which is the only nvpair
1062 	 * in the list, and record its location.
1063 	 */
1064 	error = nvlist_lookup_string_array(rfs4_dss_paths, NFS4_DSS_NVPAIR_NAME,
1065 	    &rfs4_dss_newpaths, &rfs4_dss_numnewpaths);
1066 	return (error);
1067 }
1068 
1069 /*
1070  * Ultimately the nfssys() call NFS4_CLR_STATE endsup here
1071  * to find and mark the client for forced expire.
1072  */
1073 static void
1074 rfs4_client_scrub(rfs4_entry_t ent, void *arg)
1075 {
1076 	rfs4_client_t *cp = (rfs4_client_t *)ent;
1077 	struct nfs4clrst_args *clr = arg;
1078 	struct sockaddr_in6 *ent_sin6;
1079 	struct in6_addr  clr_in6;
1080 	struct sockaddr_in  *ent_sin;
1081 	struct in_addr   clr_in;
1082 
1083 	if (clr->addr_type != cp->rc_addr.ss_family) {
1084 		return;
1085 	}
1086 
1087 	switch (clr->addr_type) {
1088 
1089 	case AF_INET6:
1090 		/* copyin the address from user space */
1091 		if (copyin(clr->ap, &clr_in6, sizeof (clr_in6))) {
1092 			break;
1093 		}
1094 
1095 		ent_sin6 = (struct sockaddr_in6 *)&cp->rc_addr;
1096 
1097 		/*
1098 		 * now compare, and if equivalent mark entry
1099 		 * for forced expiration
1100 		 */
1101 		if (IN6_ARE_ADDR_EQUAL(&ent_sin6->sin6_addr, &clr_in6)) {
1102 			cp->rc_forced_expire = 1;
1103 		}
1104 		break;
1105 
1106 	case AF_INET:
1107 		/* copyin the address from user space */
1108 		if (copyin(clr->ap, &clr_in, sizeof (clr_in))) {
1109 			break;
1110 		}
1111 
1112 		ent_sin = (struct sockaddr_in *)&cp->rc_addr;
1113 
1114 		/*
1115 		 * now compare, and if equivalent mark entry
1116 		 * for forced expiration
1117 		 */
1118 		if (ent_sin->sin_addr.s_addr == clr_in.s_addr) {
1119 			cp->rc_forced_expire = 1;
1120 		}
1121 		break;
1122 
1123 	default:
1124 		/* force this assert to fail */
1125 		ASSERT(clr->addr_type != clr->addr_type);
1126 	}
1127 }
1128 
1129 /*
1130  * This is called from nfssys() in order to clear server state
1131  * for the specified client IP Address.
1132  */
1133 void
1134 rfs4_clear_client_state(struct nfs4clrst_args *clr)
1135 {
1136 	nfs4_srv_t *nsrv4;
1137 	nsrv4 = nfs4_get_srv();
1138 	(void) rfs4_dbe_walk(nsrv4->rfs4_client_tab, rfs4_client_scrub, clr);
1139 }
1140 
1141 /*
1142  * Used to initialize the NFSv4 server's state or database.  All of
1143  * the tables are created and timers are set.
1144  */
1145 void
1146 rfs4_state_g_init()
1147 {
1148 	extern boolean_t rfs4_cpr_callb(void *, int);
1149 	/*
1150 	 * Add a CPR callback so that we can update client
1151 	 * access times to extend the lease after a suspend
1152 	 * and resume (using the same class as rpcmod/connmgr)
1153 	 */
1154 	cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");
1155 
1156 	/*
1157 	 * NFSv4 server state databases
1158 	 *
1159 	 * Initialized when the module is loaded and used by NFSv4 state
1160 	 * tables.  These kmem_cache free pools are used globally, the NFSv4
1161 	 * state tables which make use of these kmem_cache free pools are per
1162 	 * zone.
1163 	 *
1164 	 * initialize the global kmem_cache free pools which will be used by
1165 	 * the NFSv4 state tables.
1166 	 */
1167 	/* CSTYLED */
1168 	rfs4_client_mem_cache = nfs4_init_mem_cache("Client_entry_cache", 2, sizeof (rfs4_client_t), 0);
1169 	/* CSTYLED */
1170 	rfs4_clntIP_mem_cache = nfs4_init_mem_cache("ClntIP_entry_cache", 1, sizeof (rfs4_clntip_t), 1);
1171 	/* CSTYLED */
1172 	rfs4_openown_mem_cache = nfs4_init_mem_cache("OpenOwner_entry_cache", 1, sizeof (rfs4_openowner_t), 2);
1173 	/* CSTYLED */
1174 	rfs4_openstID_mem_cache = nfs4_init_mem_cache("OpenStateID_entry_cache", 3, sizeof (rfs4_state_t), 3);
1175 	/* CSTYLED */
1176 	rfs4_lockstID_mem_cache = nfs4_init_mem_cache("LockStateID_entry_cache", 3, sizeof (rfs4_lo_state_t), 4);
1177 	/* CSTYLED */
1178 	rfs4_lockown_mem_cache = nfs4_init_mem_cache("Lockowner_entry_cache", 2, sizeof (rfs4_lockowner_t), 5);
1179 	/* CSTYLED */
1180 	rfs4_file_mem_cache = nfs4_init_mem_cache("File_entry_cache", 1, sizeof (rfs4_file_t), 6);
1181 	/* CSTYLED */
1182 	rfs4_delegstID_mem_cache = nfs4_init_mem_cache("DelegStateID_entry_cache", 2, sizeof (rfs4_deleg_state_t), 7);
1183 
1184 	rfs4_client_clrst = rfs4_clear_client_state;
1185 }
1186 
1187 
1188 /*
1189  * Used at server shutdown to cleanup all of the NFSv4 server's structures
1190  * and other state.
1191  */
1192 void
1193 rfs4_state_g_fini()
1194 {
1195 	int i;
1196 	/*
1197 	 * Cleanup the CPR callback.
1198 	 */
1199 	if (cpr_id)
1200 		(void) callb_delete(cpr_id);
1201 
1202 	rfs4_client_clrst = NULL;
1203 
1204 	/* free the NFSv4 state databases */
1205 	for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
1206 		kmem_cache_destroy(rfs4_db_mem_cache_table[i].r_db_mem_cache);
1207 		rfs4_db_mem_cache_table[i].r_db_mem_cache = NULL;
1208 	}
1209 
1210 	rfs4_client_mem_cache = NULL;
1211 	rfs4_clntIP_mem_cache = NULL;
1212 	rfs4_openown_mem_cache = NULL;
1213 	rfs4_openstID_mem_cache = NULL;
1214 	rfs4_lockstID_mem_cache = NULL;
1215 	rfs4_lockown_mem_cache = NULL;
1216 	rfs4_file_mem_cache = NULL;
1217 	rfs4_delegstID_mem_cache = NULL;
1218 
1219 	/* DSS: distributed stable storage */
1220 	nvlist_free(rfs4_dss_oldpaths);
1221 	nvlist_free(rfs4_dss_paths);
1222 	rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
1223 }
1224 
1225 /*
1226  * Used to initialize the per zone NFSv4 server's state
1227  */
1228 void
1229 rfs4_state_zone_init(nfs4_srv_t *nsrv4)
1230 {
1231 	time_t start_time;
1232 	int start_grace;
1233 	char *dss_path = NFS4_DSS_VAR_DIR;
1234 
1235 	/* DSS: distributed stable storage: initialise served paths list */
1236 	nsrv4->dss_pathlist = NULL;
1237 
1238 	/*
1239 	 * Set the boot time.  If the server
1240 	 * has been restarted quickly and has had the opportunity to
1241 	 * service clients, then the start_time needs to be bumped
1242 	 * regardless.  A small window but it exists...
1243 	 */
1244 	start_time = gethrestime_sec();
1245 	if (nsrv4->rfs4_start_time < start_time)
1246 		nsrv4->rfs4_start_time = start_time;
1247 	else
1248 		nsrv4->rfs4_start_time++;
1249 
1250 	/*
1251 	 * Create the first server instance, or a new one if the server has
1252 	 * been restarted; see above comments on rfs4_start_time. Don't
1253 	 * start its grace period; that will be done later, to maximise the
1254 	 * clients' recovery window.
1255 	 */
1256 	start_grace = 0;
1257 	if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
1258 		int i;
1259 		char **dss_allpaths = NULL;
1260 		dss_allpaths = kmem_alloc(sizeof (char *) *
1261 		    (rfs4_dss_numnewpaths + 1), KM_SLEEP);
1262 		/*
1263 		 * Add the default path into the list of paths for saving
1264 		 * state informantion.
1265 		 */
1266 		dss_allpaths[0] = dss_path;
1267 		for (i = 0; i < rfs4_dss_numnewpaths; i++) {
1268 			dss_allpaths[i + 1] = rfs4_dss_newpaths[i];
1269 		}
1270 		rfs4_servinst_create(nsrv4, start_grace,
1271 		    (rfs4_dss_numnewpaths + 1), dss_allpaths);
1272 		kmem_free(dss_allpaths,
1273 		    (sizeof (char *) * (rfs4_dss_numnewpaths + 1)));
1274 	} else {
1275 		rfs4_servinst_create(nsrv4, start_grace, 1, &dss_path);
1276 	}
1277 
1278 	/* reset the "first NFSv4 request" status */
1279 	nsrv4->seen_first_compound = 0;
1280 
1281 	mutex_enter(&nsrv4->state_lock);
1282 
1283 	/*
1284 	 * If the server state database has already been initialized,
1285 	 * skip it
1286 	 */
1287 	if (nsrv4->nfs4_server_state != NULL) {
1288 		mutex_exit(&nsrv4->state_lock);
1289 		return;
1290 	}
1291 
1292 	rw_init(&nsrv4->rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);
1293 
1294 	/* set the various cache timers for table creation */
1295 	if (nsrv4->rfs4_client_cache_time == 0)
1296 		nsrv4->rfs4_client_cache_time = CLIENT_CACHE_TIME;
1297 	if (nsrv4->rfs4_openowner_cache_time == 0)
1298 		nsrv4->rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
1299 	if (nsrv4->rfs4_state_cache_time == 0)
1300 		nsrv4->rfs4_state_cache_time = STATE_CACHE_TIME;
1301 	if (nsrv4->rfs4_lo_state_cache_time == 0)
1302 		nsrv4->rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
1303 	if (nsrv4->rfs4_lockowner_cache_time == 0)
1304 		nsrv4->rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
1305 	if (nsrv4->rfs4_file_cache_time == 0)
1306 		nsrv4->rfs4_file_cache_time = FILE_CACHE_TIME;
1307 	if (nsrv4->rfs4_deleg_state_cache_time == 0)
1308 		nsrv4->rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
1309 
1310 	/* Create the overall database to hold all server state */
1311 	nsrv4->nfs4_server_state = rfs4_database_create(rfs4_database_debug);
1312 
1313 	/* Now create the individual tables */
1314 	nsrv4->rfs4_client_cache_time *= rfs4_lease_time;
1315 	nsrv4->rfs4_client_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1316 	    "Client",
1317 	    nsrv4->rfs4_client_cache_time,
1318 	    2,
1319 	    rfs4_client_create,
1320 	    rfs4_client_destroy,
1321 	    rfs4_client_expiry,
1322 	    sizeof (rfs4_client_t),
1323 	    TABSIZE,
1324 	    MAXTABSZ/8, 100);
1325 	nsrv4->rfs4_nfsclnt_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1326 	    "nfs_client_id4", nfsclnt_hash,
1327 	    nfsclnt_compare, nfsclnt_mkkey,
1328 	    TRUE);
1329 	nsrv4->rfs4_clientid_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1330 	    "client_id", clientid_hash,
1331 	    clientid_compare, clientid_mkkey,
1332 	    FALSE);
1333 
1334 	nsrv4->rfs4_clntip_cache_time = 86400 * 365;	/* about a year */
1335 	nsrv4->rfs4_clntip_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1336 	    "ClntIP",
1337 	    nsrv4->rfs4_clntip_cache_time,
1338 	    1,
1339 	    rfs4_clntip_create,
1340 	    rfs4_clntip_destroy,
1341 	    rfs4_clntip_expiry,
1342 	    sizeof (rfs4_clntip_t),
1343 	    TABSIZE,
1344 	    MAXTABSZ, 100);
1345 	nsrv4->rfs4_clntip_idx = rfs4_index_create(nsrv4->rfs4_clntip_tab,
1346 	    "client_ip", clntip_hash,
1347 	    clntip_compare, clntip_mkkey,
1348 	    TRUE);
1349 
1350 	nsrv4->rfs4_openowner_cache_time *= rfs4_lease_time;
1351 	nsrv4->rfs4_openowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1352 	    "OpenOwner",
1353 	    nsrv4->rfs4_openowner_cache_time,
1354 	    1,
1355 	    rfs4_openowner_create,
1356 	    rfs4_openowner_destroy,
1357 	    rfs4_openowner_expiry,
1358 	    sizeof (rfs4_openowner_t),
1359 	    TABSIZE,
1360 	    MAXTABSZ, 100);
1361 	nsrv4->rfs4_openowner_idx = rfs4_index_create(nsrv4->rfs4_openowner_tab,
1362 	    "open_owner4", openowner_hash,
1363 	    openowner_compare,
1364 	    openowner_mkkey, TRUE);
1365 
1366 	nsrv4->rfs4_state_cache_time *= rfs4_lease_time;
1367 	nsrv4->rfs4_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1368 	    "OpenStateID",
1369 	    nsrv4->rfs4_state_cache_time,
1370 	    3,
1371 	    rfs4_state_create,
1372 	    rfs4_state_destroy,
1373 	    rfs4_state_expiry,
1374 	    sizeof (rfs4_state_t),
1375 	    TABSIZE,
1376 	    MAXTABSZ, 100);
1377 
1378 	/* CSTYLED */
1379 	nsrv4->rfs4_state_owner_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1380 	    "Openowner-File",
1381 	    state_owner_file_hash,
1382 	    state_owner_file_compare,
1383 	    state_owner_file_mkkey, TRUE);
1384 
1385 	nsrv4->rfs4_state_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1386 	    "State-id", state_hash,
1387 	    state_compare, state_mkkey, FALSE);
1388 
1389 	nsrv4->rfs4_state_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1390 	    "File", state_file_hash,
1391 	    state_file_compare, state_file_mkkey,
1392 	    FALSE);
1393 
1394 	nsrv4->rfs4_lo_state_cache_time *= rfs4_lease_time;
1395 	nsrv4->rfs4_lo_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1396 	    "LockStateID",
1397 	    nsrv4->rfs4_lo_state_cache_time,
1398 	    2,
1399 	    rfs4_lo_state_create,
1400 	    rfs4_lo_state_destroy,
1401 	    rfs4_lo_state_expiry,
1402 	    sizeof (rfs4_lo_state_t),
1403 	    TABSIZE,
1404 	    MAXTABSZ, 100);
1405 
1406 	/* CSTYLED */
1407 	nsrv4->rfs4_lo_state_owner_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1408 	    "lockownerxstate",
1409 	    lo_state_lo_hash,
1410 	    lo_state_lo_compare,
1411 	    lo_state_lo_mkkey, TRUE);
1412 
1413 	nsrv4->rfs4_lo_state_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1414 	    "State-id",
1415 	    lo_state_hash, lo_state_compare,
1416 	    lo_state_mkkey, FALSE);
1417 
1418 	nsrv4->rfs4_lockowner_cache_time *= rfs4_lease_time;
1419 
1420 	nsrv4->rfs4_lockowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1421 	    "Lockowner",
1422 	    nsrv4->rfs4_lockowner_cache_time,
1423 	    2,
1424 	    rfs4_lockowner_create,
1425 	    rfs4_lockowner_destroy,
1426 	    rfs4_lockowner_expiry,
1427 	    sizeof (rfs4_lockowner_t),
1428 	    TABSIZE,
1429 	    MAXTABSZ, 100);
1430 
1431 	nsrv4->rfs4_lockowner_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1432 	    "lock_owner4", lockowner_hash,
1433 	    lockowner_compare,
1434 	    lockowner_mkkey, TRUE);
1435 
1436 	/* CSTYLED */
1437 	nsrv4->rfs4_lockowner_pid_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1438 	    "pid", pid_hash,
1439 	    pid_compare, pid_mkkey,
1440 	    FALSE);
1441 
1442 	nsrv4->rfs4_file_cache_time *= rfs4_lease_time;
1443 	nsrv4->rfs4_file_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1444 	    "File",
1445 	    nsrv4->rfs4_file_cache_time,
1446 	    1,
1447 	    rfs4_file_create,
1448 	    rfs4_file_destroy,
1449 	    NULL,
1450 	    sizeof (rfs4_file_t),
1451 	    TABSIZE,
1452 	    MAXTABSZ, -1);
1453 
1454 	nsrv4->rfs4_file_idx = rfs4_index_create(nsrv4->rfs4_file_tab,
1455 	    "Filehandle", file_hash,
1456 	    file_compare, file_mkkey, TRUE);
1457 
1458 	nsrv4->rfs4_deleg_state_cache_time *= rfs4_lease_time;
1459 	/* CSTYLED */
1460 	nsrv4->rfs4_deleg_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1461 	    "DelegStateID",
1462 	    nsrv4->rfs4_deleg_state_cache_time,
1463 	    2,
1464 	    rfs4_deleg_state_create,
1465 	    rfs4_deleg_state_destroy,
1466 	    rfs4_deleg_state_expiry,
1467 	    sizeof (rfs4_deleg_state_t),
1468 	    TABSIZE,
1469 	    MAXTABSZ, 100);
1470 	nsrv4->rfs4_deleg_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1471 	    "DelegByFileClient",
1472 	    deleg_hash,
1473 	    deleg_compare,
1474 	    deleg_mkkey, TRUE);
1475 
1476 	/* CSTYLED */
1477 	nsrv4->rfs4_deleg_state_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1478 	    "DelegState",
1479 	    deleg_state_hash,
1480 	    deleg_state_compare,
1481 	    deleg_state_mkkey, FALSE);
1482 
1483 	mutex_exit(&nsrv4->state_lock);
1484 
1485 	/*
1486 	 * Init the stable storage.
1487 	 */
1488 	rfs4_ss_init(nsrv4);
1489 }
1490 
1491 /*
1492  * Used at server shutdown to cleanup all of NFSv4 server's zone structures
1493  * and state.
1494  */
1495 void
1496 rfs4_state_zone_fini()
1497 {
1498 	rfs4_database_t *dbp;
1499 	nfs4_srv_t *nsrv4;
1500 	nsrv4 = nfs4_get_srv();
1501 
1502 	rfs4_set_deleg_policy(nsrv4, SRV_NEVER_DELEGATE);
1503 
1504 	/*
1505 	 * Clean up any dangling stable storage structures BEFORE calling
1506 	 * rfs4_servinst_destroy_all() so there are no dangling structures
1507 	 * (i.e. the srvinsts are all cleared of danglers BEFORE they get
1508 	 * freed).
1509 	 */
1510 	rfs4_ss_fini(nsrv4);
1511 
1512 	mutex_enter(&nsrv4->state_lock);
1513 
1514 	if (nsrv4->nfs4_server_state == NULL) {
1515 		mutex_exit(&nsrv4->state_lock);
1516 		return;
1517 	}
1518 
1519 	/* destroy server instances and current instance ptr */
1520 	rfs4_servinst_destroy_all(nsrv4);
1521 
1522 	/* reset the "first NFSv4 request" status */
1523 	nsrv4->seen_first_compound = 0;
1524 
1525 	dbp = nsrv4->nfs4_server_state;
1526 	nsrv4->nfs4_server_state = NULL;
1527 
1528 	rw_destroy(&nsrv4->rfs4_findclient_lock);
1529 
1530 	/* First stop all of the reaper threads in the database */
1531 	rfs4_database_shutdown(dbp);
1532 
1533 	/*
1534 	 * WARNING: There may be consumers of the rfs4 database still
1535 	 * active as we destroy these.  IF that's the case, consider putting
1536 	 * some of their _zone_fini()-like functions into the zsd key as
1537 	 * ~~SHUTDOWN~~ functions instead of ~~DESTROY~~ functions.  We can
1538 	 * maintain some ordering guarantees better that way.
1539 	 */
1540 	/* Now destroy/release the database tables */
1541 	rfs4_database_destroy(dbp);
1542 
1543 	/* Reset the cache timers for next time */
1544 	nsrv4->rfs4_client_cache_time = 0;
1545 	nsrv4->rfs4_openowner_cache_time = 0;
1546 	nsrv4->rfs4_state_cache_time = 0;
1547 	nsrv4->rfs4_lo_state_cache_time = 0;
1548 	nsrv4->rfs4_lockowner_cache_time = 0;
1549 	nsrv4->rfs4_file_cache_time = 0;
1550 	nsrv4->rfs4_deleg_state_cache_time = 0;
1551 
1552 	mutex_exit(&nsrv4->state_lock);
1553 }
1554 
1555 typedef union {
1556 	struct {
1557 		uint32_t start_time;
1558 		uint32_t c_id;
1559 	} impl_id;
1560 	clientid4 id4;
1561 } cid;
1562 
1563 static int foreign_stateid(stateid_t *id);
1564 static int foreign_clientid(cid *cidp);
1565 static void embed_nodeid(cid *cidp);
1566 
1567 typedef union {
1568 	struct {
1569 		uint32_t c_id;
1570 		uint32_t gen_num;
1571 	} cv_impl;
1572 	verifier4	confirm_verf;
1573 } scid_confirm_verf;
1574 
1575 static uint32_t
1576 clientid_hash(void *key)
1577 {
1578 	cid *idp = key;
1579 
1580 	return (idp->impl_id.c_id);
1581 }
1582 
1583 static bool_t
1584 clientid_compare(rfs4_entry_t entry, void *key)
1585 {
1586 	rfs4_client_t *cp = (rfs4_client_t *)entry;
1587 	clientid4 *idp = key;
1588 
1589 	return (*idp == cp->rc_clientid);
1590 }
1591 
1592 static void *
1593 clientid_mkkey(rfs4_entry_t entry)
1594 {
1595 	rfs4_client_t *cp = (rfs4_client_t *)entry;
1596 
1597 	return (&cp->rc_clientid);
1598 }
1599 
1600 static uint32_t
1601 nfsclnt_hash(void *key)
1602 {
1603 	nfs_client_id4 *client = key;
1604 	int i;
1605 	uint32_t hash = 0;
1606 
1607 	for (i = 0; i < client->id_len; i++) {
1608 		hash <<= 1;
1609 		hash += (uint_t)client->id_val[i];
1610 	}
1611 	return (hash);
1612 }
1613 
1614 
1615 static bool_t
1616 nfsclnt_compare(rfs4_entry_t entry, void *key)
1617 {
1618 	rfs4_client_t *cp = (rfs4_client_t *)entry;
1619 	nfs_client_id4 *nfs_client = key;
1620 
1621 	if (cp->rc_nfs_client.id_len != nfs_client->id_len)
1622 		return (FALSE);
1623 
1624 	return (bcmp(cp->rc_nfs_client.id_val, nfs_client->id_val,
1625 	    nfs_client->id_len) == 0);
1626 }
1627 
1628 static void *
1629 nfsclnt_mkkey(rfs4_entry_t entry)
1630 {
1631 	rfs4_client_t *cp = (rfs4_client_t *)entry;
1632 
1633 	return (&cp->rc_nfs_client);
1634 }
1635 
1636 static bool_t
1637 rfs4_client_expiry(rfs4_entry_t u_entry)
1638 {
1639 	rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1640 	bool_t cp_expired;
1641 
1642 	if (rfs4_dbe_is_invalid(cp->rc_dbe)) {
1643 		cp->rc_ss_remove = 1;
1644 		return (TRUE);
1645 	}
1646 	/*
1647 	 * If the sysadmin has used clear_locks for this
1648 	 * entry then forced_expire will be set and we
1649 	 * want this entry to be reaped. Or the entry
1650 	 * has exceeded its lease period.
1651 	 */
1652 	cp_expired = (cp->rc_forced_expire ||
1653 	    (gethrestime_sec() - cp->rc_last_access
1654 	    > rfs4_lease_time));
1655 
1656 	if (!cp->rc_ss_remove && cp_expired)
1657 		cp->rc_ss_remove = 1;
1658 	return (cp_expired);
1659 }
1660 
1661 /*
1662  * Remove the leaf file from all distributed stable storage paths.
1663  */
1664 static void
1665 rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
1666 {
1667 	nfs4_srv_t *nsrv4;
1668 	rfs4_servinst_t *sip;
1669 	char *leaf = cp->rc_ss_pn->leaf;
1670 
1671 	/*
1672 	 * since the state files are written to all DSS
1673 	 * paths we must remove this leaf file instance
1674 	 * from all server instances.
1675 	 */
1676 
1677 	nsrv4 = nfs4_get_srv();
1678 	mutex_enter(&nsrv4->servinst_lock);
1679 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
1680 		/* remove the leaf file associated with this server instance */
1681 		rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf);
1682 	}
1683 	mutex_exit(&nsrv4->servinst_lock);
1684 }
1685 
1686 static void
1687 rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf)
1688 {
1689 	int i, npaths = sip->dss_npaths;
1690 
1691 	for (i = 0; i < npaths; i++) {
1692 		rfs4_dss_path_t *dss_path = sip->dss_paths[i];
1693 		char *path, *dir;
1694 		size_t pathlen;
1695 
1696 		/* the HA-NFSv4 path might have been failed-over away from us */
1697 		if (dss_path == NULL)
1698 			continue;
1699 
1700 		dir = dss_path->path;
1701 
1702 		/* allow 3 extra bytes for two '/' & a NUL */
1703 		pathlen = strlen(dir) + strlen(dir_leaf) + strlen(leaf) + 3;
1704 		path = kmem_alloc(pathlen, KM_SLEEP);
1705 		(void) sprintf(path, "%s/%s/%s", dir, dir_leaf, leaf);
1706 
1707 		(void) vn_remove(path, UIO_SYSSPACE, RMFILE);
1708 
1709 		kmem_free(path, pathlen);
1710 	}
1711 }
1712 
1713 static void
1714 rfs4_client_destroy(rfs4_entry_t u_entry)
1715 {
1716 	rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1717 
1718 	mutex_destroy(cp->rc_cbinfo.cb_lock);
1719 	cv_destroy(cp->rc_cbinfo.cb_cv);
1720 	cv_destroy(cp->rc_cbinfo.cb_cv_nullcaller);
1721 	list_destroy(&cp->rc_openownerlist);
1722 
1723 	/* free callback info */
1724 	rfs4_cbinfo_free(&cp->rc_cbinfo);
1725 
1726 	if (cp->rc_cp_confirmed)
1727 		rfs4_client_rele(cp->rc_cp_confirmed);
1728 
1729 	if (cp->rc_ss_pn) {
1730 		/* check if the stable storage files need to be removed */
1731 		if (cp->rc_ss_remove)
1732 			rfs4_dss_remove_cpleaf(cp);
1733 		rfs4_ss_pnfree(cp->rc_ss_pn);
1734 	}
1735 
1736 	/* Free the client supplied client id */
1737 	kmem_free(cp->rc_nfs_client.id_val, cp->rc_nfs_client.id_len);
1738 
1739 	if (cp->rc_sysidt != LM_NOSYSID)
1740 		lm_free_sysidt(cp->rc_sysidt);
1741 }
1742 
1743 static bool_t
1744 rfs4_client_create(rfs4_entry_t u_entry, void *arg)
1745 {
1746 	rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1747 	nfs_client_id4 *client = (nfs_client_id4 *)arg;
1748 	struct sockaddr *ca;
1749 	cid *cidp;
1750 	scid_confirm_verf *scvp;
1751 	nfs4_srv_t *nsrv4;
1752 
1753 	nsrv4 = nfs4_get_srv();
1754 
1755 	/* Get a clientid to give to the client */
1756 	cidp = (cid *)&cp->rc_clientid;
1757 	cidp->impl_id.start_time = nsrv4->rfs4_start_time;
1758 	cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe);
1759 
1760 	/* If we are booted as a cluster node, embed our nodeid */
1761 	if (cluster_bootflags & CLUSTER_BOOTED)
1762 		embed_nodeid(cidp);
1763 
1764 	/* Allocate and copy client's client id value */
1765 	cp->rc_nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP);
1766 	cp->rc_nfs_client.id_len = client->id_len;
1767 	bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len);
1768 	cp->rc_nfs_client.verifier = client->verifier;
1769 
1770 	/* Copy client's IP address */
1771 	ca = client->cl_addr;
1772 	if (ca->sa_family == AF_INET)
1773 		bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in));
1774 	else if (ca->sa_family == AF_INET6)
1775 		bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in6));
1776 	cp->rc_nfs_client.cl_addr = (struct sockaddr *)&cp->rc_addr;
1777 
1778 	/* Init the value for the SETCLIENTID_CONFIRM verifier */
1779 	scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1780 	scvp->cv_impl.c_id = cidp->impl_id.c_id;
1781 	scvp->cv_impl.gen_num = 0;
1782 
1783 	/* An F_UNLKSYS has been done for this client */
1784 	cp->rc_unlksys_completed = FALSE;
1785 
1786 	/* We need the client to ack us */
1787 	cp->rc_need_confirm = TRUE;
1788 	cp->rc_cp_confirmed = NULL;
1789 
1790 	/* TRUE all the time until the callback path actually fails */
1791 	cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
1792 
1793 	/* Initialize the access time to now */
1794 	cp->rc_last_access = gethrestime_sec();
1795 
1796 	cp->rc_cr_set = NULL;
1797 
1798 	cp->rc_sysidt = LM_NOSYSID;
1799 
1800 	list_create(&cp->rc_openownerlist, sizeof (rfs4_openowner_t),
1801 	    offsetof(rfs4_openowner_t, ro_node));
1802 
1803 	/* set up the callback control structure */
1804 	cp->rc_cbinfo.cb_state = CB_UNINIT;
1805 	mutex_init(cp->rc_cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL);
1806 	cv_init(cp->rc_cbinfo.cb_cv, NULL, CV_DEFAULT, NULL);
1807 	cv_init(cp->rc_cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL);
1808 
1809 	/*
1810 	 * Associate the client_t with the current server instance.
1811 	 * The hold is solely to satisfy the calling requirement of
1812 	 * rfs4_servinst_assign(). In this case it's not strictly necessary.
1813 	 */
1814 	rfs4_dbe_hold(cp->rc_dbe);
1815 	rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
1816 	rfs4_dbe_rele(cp->rc_dbe);
1817 
1818 	return (TRUE);
1819 }
1820 
1821 /*
1822  * Caller wants to generate/update the setclientid_confirm verifier
1823  * associated with a client.  This is done during the SETCLIENTID
1824  * processing.
1825  */
1826 void
1827 rfs4_client_scv_next(rfs4_client_t *cp)
1828 {
1829 	scid_confirm_verf *scvp;
1830 
1831 	/* Init the value for the SETCLIENTID_CONFIRM verifier */
1832 	scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1833 	scvp->cv_impl.gen_num++;
1834 }
1835 
1836 void
1837 rfs4_client_rele(rfs4_client_t *cp)
1838 {
1839 	rfs4_dbe_rele(cp->rc_dbe);
1840 }
1841 
1842 rfs4_client_t *
1843 rfs4_findclient(nfs_client_id4 *client, bool_t *create,	rfs4_client_t *oldcp)
1844 {
1845 	rfs4_client_t *cp;
1846 	nfs4_srv_t *nsrv4;
1847 	nsrv4 = nfs4_get_srv();
1848 
1849 
1850 	if (oldcp) {
1851 		rw_enter(&nsrv4->rfs4_findclient_lock, RW_WRITER);
1852 		rfs4_dbe_hide(oldcp->rc_dbe);
1853 	} else {
1854 		rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1855 	}
1856 
1857 	cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_nfsclnt_idx, client,
1858 	    create, (void *)client, RFS4_DBS_VALID);
1859 
1860 	if (oldcp)
1861 		rfs4_dbe_unhide(oldcp->rc_dbe);
1862 
1863 	rw_exit(&nsrv4->rfs4_findclient_lock);
1864 
1865 	return (cp);
1866 }
1867 
1868 rfs4_client_t *
1869 rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed)
1870 {
1871 	rfs4_client_t *cp;
1872 	bool_t create = FALSE;
1873 	cid *cidp = (cid *)&clientid;
1874 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
1875 
1876 	/* If we're a cluster and the nodeid isn't right, short-circuit */
1877 	if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
1878 		return (NULL);
1879 
1880 	rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1881 
1882 	cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx, &clientid,
1883 	    &create, NULL, RFS4_DBS_VALID);
1884 
1885 	rw_exit(&nsrv4->rfs4_findclient_lock);
1886 
1887 	if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) {
1888 		rfs4_client_rele(cp);
1889 		return (NULL);
1890 	} else {
1891 		return (cp);
1892 	}
1893 }
1894 
1895 static uint32_t
1896 clntip_hash(void *key)
1897 {
1898 	struct sockaddr *addr = key;
1899 	int i, len = 0;
1900 	uint32_t hash = 0;
1901 	char *ptr;
1902 
1903 	if (addr->sa_family == AF_INET) {
1904 		struct sockaddr_in *a = (struct sockaddr_in *)addr;
1905 		len = sizeof (struct in_addr);
1906 		ptr = (char *)&a->sin_addr;
1907 	} else if (addr->sa_family == AF_INET6) {
1908 		struct sockaddr_in6 *a = (struct sockaddr_in6 *)addr;
1909 		len = sizeof (struct in6_addr);
1910 		ptr = (char *)&a->sin6_addr;
1911 	} else
1912 		return (0);
1913 
1914 	for (i = 0; i < len; i++) {
1915 		hash <<= 1;
1916 		hash += (uint_t)ptr[i];
1917 	}
1918 	return (hash);
1919 }
1920 
1921 static bool_t
1922 clntip_compare(rfs4_entry_t entry, void *key)
1923 {
1924 	rfs4_clntip_t *cp = (rfs4_clntip_t *)entry;
1925 	struct sockaddr *addr = key;
1926 	int len = 0;
1927 	char *p1, *p2;
1928 
1929 	if (addr->sa_family == AF_INET) {
1930 		struct sockaddr_in *a1 = (struct sockaddr_in *)&cp->ri_addr;
1931 		struct sockaddr_in *a2 = (struct sockaddr_in *)addr;
1932 		len = sizeof (struct in_addr);
1933 		p1 = (char *)&a1->sin_addr;
1934 		p2 = (char *)&a2->sin_addr;
1935 	} else if (addr->sa_family == AF_INET6) {
1936 		struct sockaddr_in6 *a1 = (struct sockaddr_in6 *)&cp->ri_addr;
1937 		struct sockaddr_in6 *a2 = (struct sockaddr_in6 *)addr;
1938 		len = sizeof (struct in6_addr);
1939 		p1 = (char *)&a1->sin6_addr;
1940 		p2 = (char *)&a2->sin6_addr;
1941 	} else
1942 		return (0);
1943 
1944 	return (bcmp(p1, p2, len) == 0);
1945 }
1946 
1947 static void *
1948 clntip_mkkey(rfs4_entry_t entry)
1949 {
1950 	rfs4_clntip_t *cp = (rfs4_clntip_t *)entry;
1951 
1952 	return (&cp->ri_addr);
1953 }
1954 
1955 static bool_t
1956 rfs4_clntip_expiry(rfs4_entry_t u_entry)
1957 {
1958 	rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1959 
1960 	if (rfs4_dbe_is_invalid(cp->ri_dbe))
1961 		return (TRUE);
1962 	return (FALSE);
1963 }
1964 
1965 /* ARGSUSED */
1966 static void
1967 rfs4_clntip_destroy(rfs4_entry_t u_entry)
1968 {
1969 }
1970 
1971 static bool_t
1972 rfs4_clntip_create(rfs4_entry_t u_entry, void *arg)
1973 {
1974 	rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1975 	struct sockaddr *ca = (struct sockaddr *)arg;
1976 
1977 	/* Copy client's IP address */
1978 	if (ca->sa_family == AF_INET)
1979 		bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in));
1980 	else if (ca->sa_family == AF_INET6)
1981 		bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in6));
1982 	else
1983 		return (FALSE);
1984 	cp->ri_no_referrals = 1;
1985 
1986 	return (TRUE);
1987 }
1988 
1989 rfs4_clntip_t *
1990 rfs4_find_clntip(struct sockaddr *addr, bool_t *create)
1991 {
1992 	rfs4_clntip_t *cp;
1993 	nfs4_srv_t *nsrv4;
1994 
1995 	nsrv4 = nfs4_get_srv();
1996 
1997 	rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1998 
1999 	cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
2000 	    create, addr, RFS4_DBS_VALID);
2001 
2002 	rw_exit(&nsrv4->rfs4_findclient_lock);
2003 
2004 	return (cp);
2005 }
2006 
2007 void
2008 rfs4_invalidate_clntip(struct sockaddr *addr)
2009 {
2010 	rfs4_clntip_t *cp;
2011 	bool_t create = FALSE;
2012 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2013 
2014 	rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2015 
2016 	cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
2017 	    &create, NULL, RFS4_DBS_VALID);
2018 	if (cp == NULL) {
2019 		rw_exit(&nsrv4->rfs4_findclient_lock);
2020 		return;
2021 	}
2022 	rfs4_dbe_invalidate(cp->ri_dbe);
2023 	rfs4_dbe_rele(cp->ri_dbe);
2024 
2025 	rw_exit(&nsrv4->rfs4_findclient_lock);
2026 }
2027 
2028 bool_t
2029 rfs4_lease_expired(rfs4_client_t *cp)
2030 {
2031 	bool_t rc;
2032 
2033 	rfs4_dbe_lock(cp->rc_dbe);
2034 
2035 	/*
2036 	 * If the admin has executed clear_locks for this
2037 	 * client id, force expire will be set, so no need
2038 	 * to calculate anything because it's "outa here".
2039 	 */
2040 	if (cp->rc_forced_expire) {
2041 		rc = TRUE;
2042 	} else {
2043 		rc = (gethrestime_sec() - cp->rc_last_access > rfs4_lease_time);
2044 	}
2045 
2046 	/*
2047 	 * If the lease has expired we will also want
2048 	 * to remove any stable storage state data. So
2049 	 * mark the client id accordingly.
2050 	 */
2051 	if (!cp->rc_ss_remove)
2052 		cp->rc_ss_remove = (rc == TRUE);
2053 
2054 	rfs4_dbe_unlock(cp->rc_dbe);
2055 
2056 	return (rc);
2057 }
2058 
2059 void
2060 rfs4_update_lease(rfs4_client_t *cp)
2061 {
2062 	rfs4_dbe_lock(cp->rc_dbe);
2063 	if (!cp->rc_forced_expire)
2064 		cp->rc_last_access = gethrestime_sec();
2065 	rfs4_dbe_unlock(cp->rc_dbe);
2066 }
2067 
2068 
2069 static bool_t
2070 EQOPENOWNER(open_owner4 *a, open_owner4 *b)
2071 {
2072 	bool_t rc;
2073 
2074 	if (a->clientid != b->clientid)
2075 		return (FALSE);
2076 
2077 	if (a->owner_len != b->owner_len)
2078 		return (FALSE);
2079 
2080 	rc = (bcmp(a->owner_val, b->owner_val, a->owner_len) == 0);
2081 
2082 	return (rc);
2083 }
2084 
2085 static uint_t
2086 openowner_hash(void *key)
2087 {
2088 	int i;
2089 	open_owner4 *openowner = key;
2090 	uint_t hash = 0;
2091 
2092 	for (i = 0; i < openowner->owner_len; i++) {
2093 		hash <<= 4;
2094 		hash += (uint_t)openowner->owner_val[i];
2095 	}
2096 	hash += (uint_t)openowner->clientid;
2097 	hash |= (openowner->clientid >> 32);
2098 
2099 	return (hash);
2100 }
2101 
2102 static bool_t
2103 openowner_compare(rfs4_entry_t u_entry, void *key)
2104 {
2105 	rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2106 	open_owner4 *arg = key;
2107 
2108 	return (EQOPENOWNER(&oo->ro_owner, arg));
2109 }
2110 
2111 void *
2112 openowner_mkkey(rfs4_entry_t u_entry)
2113 {
2114 	rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2115 
2116 	return (&oo->ro_owner);
2117 }
2118 
2119 /* ARGSUSED */
2120 static bool_t
2121 rfs4_openowner_expiry(rfs4_entry_t u_entry)
2122 {
2123 	/* openstateid held us and did all needed delay */
2124 	return (TRUE);
2125 }
2126 
2127 static void
2128 rfs4_openowner_destroy(rfs4_entry_t u_entry)
2129 {
2130 	rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2131 
2132 	/* Remove open owner from client's lists of open owners */
2133 	rfs4_dbe_lock(oo->ro_client->rc_dbe);
2134 	list_remove(&oo->ro_client->rc_openownerlist, oo);
2135 	rfs4_dbe_unlock(oo->ro_client->rc_dbe);
2136 
2137 	/* One less reference to the client */
2138 	rfs4_client_rele(oo->ro_client);
2139 	oo->ro_client = NULL;
2140 
2141 	/* Free the last reply for this lock owner */
2142 	rfs4_free_reply(&oo->ro_reply);
2143 
2144 	if (oo->ro_reply_fh.nfs_fh4_val) {
2145 		kmem_free(oo->ro_reply_fh.nfs_fh4_val,
2146 		    oo->ro_reply_fh.nfs_fh4_len);
2147 		oo->ro_reply_fh.nfs_fh4_val = NULL;
2148 		oo->ro_reply_fh.nfs_fh4_len = 0;
2149 	}
2150 
2151 	rfs4_sw_destroy(&oo->ro_sw);
2152 	list_destroy(&oo->ro_statelist);
2153 
2154 	/* Free the lock owner id */
2155 	kmem_free(oo->ro_owner.owner_val, oo->ro_owner.owner_len);
2156 }
2157 
2158 void
2159 rfs4_openowner_rele(rfs4_openowner_t *oo)
2160 {
2161 	rfs4_dbe_rele(oo->ro_dbe);
2162 }
2163 
2164 static bool_t
2165 rfs4_openowner_create(rfs4_entry_t u_entry, void *arg)
2166 {
2167 	rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2168 	rfs4_openowner_t *argp = (rfs4_openowner_t *)arg;
2169 	open_owner4 *openowner = &argp->ro_owner;
2170 	seqid4 seqid = argp->ro_open_seqid;
2171 	rfs4_client_t *cp;
2172 	bool_t create = FALSE;
2173 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2174 
2175 	rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2176 
2177 	cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2178 	    &openowner->clientid,
2179 	    &create, NULL, RFS4_DBS_VALID);
2180 
2181 	rw_exit(&nsrv4->rfs4_findclient_lock);
2182 
2183 	if (cp == NULL)
2184 		return (FALSE);
2185 
2186 	oo->ro_reply_fh.nfs_fh4_len = 0;
2187 	oo->ro_reply_fh.nfs_fh4_val = NULL;
2188 
2189 	oo->ro_owner.clientid = openowner->clientid;
2190 	oo->ro_owner.owner_val =
2191 	    kmem_alloc(openowner->owner_len, KM_SLEEP);
2192 
2193 	bcopy(openowner->owner_val,
2194 	    oo->ro_owner.owner_val, openowner->owner_len);
2195 
2196 	oo->ro_owner.owner_len = openowner->owner_len;
2197 
2198 	oo->ro_need_confirm = TRUE;
2199 
2200 	rfs4_sw_init(&oo->ro_sw);
2201 
2202 	oo->ro_open_seqid = seqid;
2203 	bzero(&oo->ro_reply, sizeof (nfs_resop4));
2204 	oo->ro_client = cp;
2205 	oo->ro_cr_set = NULL;
2206 
2207 	list_create(&oo->ro_statelist, sizeof (rfs4_state_t),
2208 	    offsetof(rfs4_state_t, rs_node));
2209 
2210 	/* Insert openowner into client's open owner list */
2211 	rfs4_dbe_lock(cp->rc_dbe);
2212 	list_insert_tail(&cp->rc_openownerlist, oo);
2213 	rfs4_dbe_unlock(cp->rc_dbe);
2214 
2215 	return (TRUE);
2216 }
2217 
2218 rfs4_openowner_t *
2219 rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid)
2220 {
2221 	rfs4_openowner_t *oo;
2222 	rfs4_openowner_t arg;
2223 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2224 
2225 	arg.ro_owner = *openowner;
2226 	arg.ro_open_seqid = seqid;
2227 	/* CSTYLED */
2228 	oo = (rfs4_openowner_t *)rfs4_dbsearch(nsrv4->rfs4_openowner_idx, openowner,
2229 	    create, &arg, RFS4_DBS_VALID);
2230 
2231 	return (oo);
2232 }
2233 
2234 void
2235 rfs4_update_open_sequence(rfs4_openowner_t *oo)
2236 {
2237 
2238 	rfs4_dbe_lock(oo->ro_dbe);
2239 
2240 	oo->ro_open_seqid++;
2241 
2242 	rfs4_dbe_unlock(oo->ro_dbe);
2243 }
2244 
2245 void
2246 rfs4_update_open_resp(rfs4_openowner_t *oo, nfs_resop4 *resp, nfs_fh4 *fh)
2247 {
2248 
2249 	rfs4_dbe_lock(oo->ro_dbe);
2250 
2251 	rfs4_free_reply(&oo->ro_reply);
2252 
2253 	rfs4_copy_reply(&oo->ro_reply, resp);
2254 
2255 	/* Save the filehandle if provided and free if not used */
2256 	if (resp->nfs_resop4_u.opopen.status == NFS4_OK &&
2257 	    fh && fh->nfs_fh4_len) {
2258 		if (oo->ro_reply_fh.nfs_fh4_val == NULL)
2259 			oo->ro_reply_fh.nfs_fh4_val =
2260 			    kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
2261 		nfs_fh4_copy(fh, &oo->ro_reply_fh);
2262 	} else {
2263 		if (oo->ro_reply_fh.nfs_fh4_val) {
2264 			kmem_free(oo->ro_reply_fh.nfs_fh4_val,
2265 			    oo->ro_reply_fh.nfs_fh4_len);
2266 			oo->ro_reply_fh.nfs_fh4_val = NULL;
2267 			oo->ro_reply_fh.nfs_fh4_len = 0;
2268 		}
2269 	}
2270 
2271 	rfs4_dbe_unlock(oo->ro_dbe);
2272 }
2273 
2274 static bool_t
2275 lockowner_compare(rfs4_entry_t u_entry, void *key)
2276 {
2277 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2278 	lock_owner4 *b = (lock_owner4 *)key;
2279 
2280 	if (lo->rl_owner.clientid != b->clientid)
2281 		return (FALSE);
2282 
2283 	if (lo->rl_owner.owner_len != b->owner_len)
2284 		return (FALSE);
2285 
2286 	return (bcmp(lo->rl_owner.owner_val, b->owner_val,
2287 	    lo->rl_owner.owner_len) == 0);
2288 }
2289 
2290 void *
2291 lockowner_mkkey(rfs4_entry_t u_entry)
2292 {
2293 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2294 
2295 	return (&lo->rl_owner);
2296 }
2297 
2298 static uint32_t
2299 lockowner_hash(void *key)
2300 {
2301 	int i;
2302 	lock_owner4 *lockowner = key;
2303 	uint_t hash = 0;
2304 
2305 	for (i = 0; i < lockowner->owner_len; i++) {
2306 		hash <<= 4;
2307 		hash += (uint_t)lockowner->owner_val[i];
2308 	}
2309 	hash += (uint_t)lockowner->clientid;
2310 	hash |= (lockowner->clientid >> 32);
2311 
2312 	return (hash);
2313 }
2314 
2315 static uint32_t
2316 pid_hash(void *key)
2317 {
2318 	return ((uint32_t)(uintptr_t)key);
2319 }
2320 
2321 static void *
2322 pid_mkkey(rfs4_entry_t u_entry)
2323 {
2324 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2325 
2326 	return ((void *)(uintptr_t)lo->rl_pid);
2327 }
2328 
2329 static bool_t
2330 pid_compare(rfs4_entry_t u_entry, void *key)
2331 {
2332 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2333 
2334 	return (lo->rl_pid == (pid_t)(uintptr_t)key);
2335 }
2336 
2337 static void
2338 rfs4_lockowner_destroy(rfs4_entry_t u_entry)
2339 {
2340 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2341 
2342 	/* Free the lock owner id */
2343 	kmem_free(lo->rl_owner.owner_val, lo->rl_owner.owner_len);
2344 	rfs4_client_rele(lo->rl_client);
2345 }
2346 
2347 void
2348 rfs4_lockowner_rele(rfs4_lockowner_t *lo)
2349 {
2350 	rfs4_dbe_rele(lo->rl_dbe);
2351 }
2352 
2353 /* ARGSUSED */
2354 static bool_t
2355 rfs4_lockowner_expiry(rfs4_entry_t u_entry)
2356 {
2357 	/*
2358 	 * Since expiry is called with no other references on
2359 	 * this struct, go ahead and have it removed.
2360 	 */
2361 	return (TRUE);
2362 }
2363 
2364 static bool_t
2365 rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg)
2366 {
2367 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2368 	lock_owner4 *lockowner = (lock_owner4 *)arg;
2369 	rfs4_client_t *cp;
2370 	bool_t create = FALSE;
2371 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2372 
2373 	rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2374 
2375 	cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2376 	    &lockowner->clientid,
2377 	    &create, NULL, RFS4_DBS_VALID);
2378 
2379 	rw_exit(&nsrv4->rfs4_findclient_lock);
2380 
2381 	if (cp == NULL)
2382 		return (FALSE);
2383 
2384 	/* Reference client */
2385 	lo->rl_client = cp;
2386 	lo->rl_owner.clientid = lockowner->clientid;
2387 	lo->rl_owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP);
2388 	bcopy(lockowner->owner_val, lo->rl_owner.owner_val,
2389 	    lockowner->owner_len);
2390 	lo->rl_owner.owner_len = lockowner->owner_len;
2391 	lo->rl_pid = rfs4_dbe_getid(lo->rl_dbe);
2392 
2393 	return (TRUE);
2394 }
2395 
2396 rfs4_lockowner_t *
2397 rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create)
2398 {
2399 	rfs4_lockowner_t *lo;
2400 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2401 
2402 	/* CSTYLED */
2403 	lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_idx, lockowner,
2404 	    create, lockowner, RFS4_DBS_VALID);
2405 
2406 	return (lo);
2407 }
2408 
2409 rfs4_lockowner_t *
2410 rfs4_findlockowner_by_pid(pid_t pid)
2411 {
2412 	rfs4_lockowner_t *lo;
2413 	bool_t create = FALSE;
2414 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2415 
2416 	lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_pid_idx,
2417 	    (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID);
2418 
2419 	return (lo);
2420 }
2421 
2422 
2423 static uint32_t
2424 file_hash(void *key)
2425 {
2426 	return (ADDRHASH(key));
2427 }
2428 
2429 static void *
2430 file_mkkey(rfs4_entry_t u_entry)
2431 {
2432 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2433 
2434 	return (fp->rf_vp);
2435 }
2436 
2437 static bool_t
2438 file_compare(rfs4_entry_t u_entry, void *key)
2439 {
2440 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2441 
2442 	return (fp->rf_vp == (vnode_t *)key);
2443 }
2444 
2445 static void
2446 rfs4_file_destroy(rfs4_entry_t u_entry)
2447 {
2448 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2449 
2450 	list_destroy(&fp->rf_delegstatelist);
2451 
2452 	if (fp->rf_filehandle.nfs_fh4_val)
2453 		kmem_free(fp->rf_filehandle.nfs_fh4_val,
2454 		    fp->rf_filehandle.nfs_fh4_len);
2455 	cv_destroy(fp->rf_dinfo.rd_recall_cv);
2456 	if (fp->rf_vp) {
2457 		vnode_t *vp = fp->rf_vp;
2458 
2459 		mutex_enter(&vp->v_vsd_lock);
2460 		(void) vsd_set(vp, nfs4_srv_vkey, NULL);
2461 		mutex_exit(&vp->v_vsd_lock);
2462 		VN_RELE(vp);
2463 		fp->rf_vp = NULL;
2464 	}
2465 	rw_destroy(&fp->rf_file_rwlock);
2466 }
2467 
2468 /*
2469  * Used to unlock the underlying dbe struct only
2470  */
2471 void
2472 rfs4_file_rele(rfs4_file_t *fp)
2473 {
2474 	rfs4_dbe_rele(fp->rf_dbe);
2475 }
2476 
2477 typedef struct {
2478     vnode_t *vp;
2479     nfs_fh4 *fh;
2480 } rfs4_fcreate_arg;
2481 
2482 static bool_t
2483 rfs4_file_create(rfs4_entry_t u_entry, void *arg)
2484 {
2485 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2486 	rfs4_fcreate_arg *ap = (rfs4_fcreate_arg *)arg;
2487 	vnode_t *vp = ap->vp;
2488 	nfs_fh4 *fh = ap->fh;
2489 
2490 	VN_HOLD(vp);
2491 
2492 	fp->rf_filehandle.nfs_fh4_len = 0;
2493 	fp->rf_filehandle.nfs_fh4_val = NULL;
2494 	ASSERT(fh && fh->nfs_fh4_len);
2495 	if (fh && fh->nfs_fh4_len) {
2496 		fp->rf_filehandle.nfs_fh4_val =
2497 		    kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
2498 		nfs_fh4_copy(fh, &fp->rf_filehandle);
2499 	}
2500 	fp->rf_vp = vp;
2501 
2502 	list_create(&fp->rf_delegstatelist, sizeof (rfs4_deleg_state_t),
2503 	    offsetof(rfs4_deleg_state_t, rds_node));
2504 
2505 	fp->rf_share_deny = fp->rf_share_access = fp->rf_access_read = 0;
2506 	fp->rf_access_write = fp->rf_deny_read = fp->rf_deny_write = 0;
2507 
2508 	mutex_init(fp->rf_dinfo.rd_recall_lock, NULL, MUTEX_DEFAULT, NULL);
2509 	cv_init(fp->rf_dinfo.rd_recall_cv, NULL, CV_DEFAULT, NULL);
2510 
2511 	fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
2512 
2513 	rw_init(&fp->rf_file_rwlock, NULL, RW_DEFAULT, NULL);
2514 
2515 	mutex_enter(&vp->v_vsd_lock);
2516 	VERIFY(vsd_set(vp, nfs4_srv_vkey, (void *)fp) == 0);
2517 	mutex_exit(&vp->v_vsd_lock);
2518 
2519 	return (TRUE);
2520 }
2521 
2522 rfs4_file_t *
2523 rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2524 {
2525 	rfs4_file_t *fp;
2526 	rfs4_fcreate_arg arg;
2527 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2528 
2529 	arg.vp = vp;
2530 	arg.fh = fh;
2531 
2532 	if (*create == TRUE)
2533 		/* CSTYLED */
2534 		fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp, create,
2535 		    &arg, RFS4_DBS_VALID);
2536 	else {
2537 		mutex_enter(&vp->v_vsd_lock);
2538 		fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2539 		if (fp) {
2540 			rfs4_dbe_lock(fp->rf_dbe);
2541 			if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2542 			    (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2543 				rfs4_dbe_unlock(fp->rf_dbe);
2544 				fp = NULL;
2545 			} else {
2546 				rfs4_dbe_hold(fp->rf_dbe);
2547 				rfs4_dbe_unlock(fp->rf_dbe);
2548 			}
2549 		}
2550 		mutex_exit(&vp->v_vsd_lock);
2551 	}
2552 	return (fp);
2553 }
2554 
2555 /*
2556  * Find a file in the db and once it is located, take the rw lock.
2557  * Need to check the vnode pointer and if it does not exist (it was
2558  * removed between the db location and check) redo the find.  This
2559  * assumes that a file struct that has a NULL vnode pointer is marked
2560  * at 'invalid' and will not be found in the db the second time
2561  * around.
2562  */
2563 rfs4_file_t *
2564 rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2565 {
2566 	rfs4_file_t *fp;
2567 	rfs4_fcreate_arg arg;
2568 	bool_t screate = *create;
2569 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2570 
2571 	if (screate == FALSE) {
2572 		mutex_enter(&vp->v_vsd_lock);
2573 		fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2574 		if (fp) {
2575 			rfs4_dbe_lock(fp->rf_dbe);
2576 			if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2577 			    (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2578 				rfs4_dbe_unlock(fp->rf_dbe);
2579 				mutex_exit(&vp->v_vsd_lock);
2580 				fp = NULL;
2581 			} else {
2582 				rfs4_dbe_hold(fp->rf_dbe);
2583 				rfs4_dbe_unlock(fp->rf_dbe);
2584 				mutex_exit(&vp->v_vsd_lock);
2585 				rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2586 				if (fp->rf_vp == NULL) {
2587 					rw_exit(&fp->rf_file_rwlock);
2588 					rfs4_file_rele(fp);
2589 					fp = NULL;
2590 				}
2591 			}
2592 		} else {
2593 			mutex_exit(&vp->v_vsd_lock);
2594 		}
2595 	} else {
2596 retry:
2597 		arg.vp = vp;
2598 		arg.fh = fh;
2599 
2600 		fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp,
2601 		    create, &arg, RFS4_DBS_VALID);
2602 		if (fp != NULL) {
2603 			rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2604 			if (fp->rf_vp == NULL) {
2605 				rw_exit(&fp->rf_file_rwlock);
2606 				rfs4_file_rele(fp);
2607 				*create = screate;
2608 				goto retry;
2609 			}
2610 		}
2611 	}
2612 
2613 	return (fp);
2614 }
2615 
2616 static uint32_t
2617 lo_state_hash(void *key)
2618 {
2619 	stateid_t *id = key;
2620 
2621 	return (id->bits.ident+id->bits.pid);
2622 }
2623 
2624 static bool_t
2625 lo_state_compare(rfs4_entry_t u_entry, void *key)
2626 {
2627 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2628 	stateid_t *id = key;
2629 	bool_t rc;
2630 
2631 	rc = (lsp->rls_lockid.bits.boottime == id->bits.boottime &&
2632 	    lsp->rls_lockid.bits.type == id->bits.type &&
2633 	    lsp->rls_lockid.bits.ident == id->bits.ident &&
2634 	    lsp->rls_lockid.bits.pid == id->bits.pid);
2635 
2636 	return (rc);
2637 }
2638 
2639 static void *
2640 lo_state_mkkey(rfs4_entry_t u_entry)
2641 {
2642 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2643 
2644 	return (&lsp->rls_lockid);
2645 }
2646 
2647 static bool_t
2648 rfs4_lo_state_expiry(rfs4_entry_t u_entry)
2649 {
2650 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2651 
2652 	if (rfs4_dbe_is_invalid(lsp->rls_dbe))
2653 		return (TRUE);
2654 	if (lsp->rls_state->rs_closed)
2655 		return (TRUE);
2656 	return ((gethrestime_sec() -
2657 	    lsp->rls_state->rs_owner->ro_client->rc_last_access
2658 	    > rfs4_lease_time));
2659 }
2660 
2661 static void
2662 rfs4_lo_state_destroy(rfs4_entry_t u_entry)
2663 {
2664 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2665 
2666 	rfs4_dbe_lock(lsp->rls_state->rs_dbe);
2667 	list_remove(&lsp->rls_state->rs_lostatelist, lsp);
2668 	rfs4_dbe_unlock(lsp->rls_state->rs_dbe);
2669 
2670 	rfs4_sw_destroy(&lsp->rls_sw);
2671 
2672 	/* Make sure to release the file locks */
2673 	if (lsp->rls_locks_cleaned == FALSE) {
2674 		lsp->rls_locks_cleaned = TRUE;
2675 		if (lsp->rls_locker->rl_client->rc_sysidt != LM_NOSYSID) {
2676 			/* Is the PxFS kernel module loaded? */
2677 			if (lm_remove_file_locks != NULL) {
2678 				int new_sysid;
2679 
2680 				/* Encode the cluster nodeid in new sysid */
2681 				new_sysid =
2682 				    lsp->rls_locker->rl_client->rc_sysidt;
2683 				lm_set_nlmid_flk(&new_sysid);
2684 
2685 				/*
2686 				 * This PxFS routine removes file locks for a
2687 				 * client over all nodes of a cluster.
2688 				 */
2689 				DTRACE_PROBE1(nfss_i_clust_rm_lck,
2690 				    int, new_sysid);
2691 				(*lm_remove_file_locks)(new_sysid);
2692 			} else {
2693 				(void) cleanlocks(
2694 				    lsp->rls_state->rs_finfo->rf_vp,
2695 				    lsp->rls_locker->rl_pid,
2696 				    lsp->rls_locker->rl_client->rc_sysidt);
2697 			}
2698 		}
2699 	}
2700 
2701 	/* Free the last reply for this state */
2702 	rfs4_free_reply(&lsp->rls_reply);
2703 
2704 	rfs4_lockowner_rele(lsp->rls_locker);
2705 	lsp->rls_locker = NULL;
2706 
2707 	rfs4_state_rele_nounlock(lsp->rls_state);
2708 	lsp->rls_state = NULL;
2709 }
2710 
2711 static bool_t
2712 rfs4_lo_state_create(rfs4_entry_t u_entry, void *arg)
2713 {
2714 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2715 	rfs4_lo_state_t *argp = (rfs4_lo_state_t *)arg;
2716 	rfs4_lockowner_t *lo = argp->rls_locker;
2717 	rfs4_state_t *sp = argp->rls_state;
2718 
2719 	lsp->rls_state = sp;
2720 
2721 	lsp->rls_lockid = sp->rs_stateid;
2722 	lsp->rls_lockid.bits.type = LOCKID;
2723 	lsp->rls_lockid.bits.chgseq = 0;
2724 	lsp->rls_lockid.bits.pid = lo->rl_pid;
2725 
2726 	lsp->rls_locks_cleaned = FALSE;
2727 	lsp->rls_lock_completed = FALSE;
2728 
2729 	rfs4_sw_init(&lsp->rls_sw);
2730 
2731 	/* Attached the supplied lock owner */
2732 	rfs4_dbe_hold(lo->rl_dbe);
2733 	lsp->rls_locker = lo;
2734 
2735 	rfs4_dbe_lock(sp->rs_dbe);
2736 	list_insert_tail(&sp->rs_lostatelist, lsp);
2737 	rfs4_dbe_hold(sp->rs_dbe);
2738 	rfs4_dbe_unlock(sp->rs_dbe);
2739 
2740 	return (TRUE);
2741 }
2742 
2743 void
2744 rfs4_lo_state_rele(rfs4_lo_state_t *lsp, bool_t unlock_fp)
2745 {
2746 	if (unlock_fp == TRUE)
2747 		rw_exit(&lsp->rls_state->rs_finfo->rf_file_rwlock);
2748 	rfs4_dbe_rele(lsp->rls_dbe);
2749 }
2750 
2751 static rfs4_lo_state_t *
2752 rfs4_findlo_state(stateid_t *id, bool_t lock_fp)
2753 {
2754 	rfs4_lo_state_t *lsp;
2755 	bool_t create = FALSE;
2756 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2757 
2758 	lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_idx, id,
2759 	    &create, NULL, RFS4_DBS_VALID);
2760 	if (lock_fp == TRUE && lsp != NULL)
2761 		rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER);
2762 
2763 	return (lsp);
2764 }
2765 
2766 
2767 static uint32_t
2768 lo_state_lo_hash(void *key)
2769 {
2770 	rfs4_lo_state_t *lsp = key;
2771 
2772 	return (ADDRHASH(lsp->rls_locker) ^ ADDRHASH(lsp->rls_state));
2773 }
2774 
2775 static bool_t
2776 lo_state_lo_compare(rfs4_entry_t u_entry, void *key)
2777 {
2778 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2779 	rfs4_lo_state_t *keyp = key;
2780 
2781 	return (keyp->rls_locker == lsp->rls_locker &&
2782 	    keyp->rls_state == lsp->rls_state);
2783 }
2784 
2785 static void *
2786 lo_state_lo_mkkey(rfs4_entry_t u_entry)
2787 {
2788 	return (u_entry);
2789 }
2790 
2791 rfs4_lo_state_t *
2792 rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp,
2793     bool_t *create)
2794 {
2795 	rfs4_lo_state_t *lsp;
2796 	rfs4_lo_state_t arg;
2797 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2798 
2799 	arg.rls_locker = lo;
2800 	arg.rls_state = sp;
2801 
2802 	lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_owner_idx,
2803 	    &arg, create, &arg, RFS4_DBS_VALID);
2804 
2805 	return (lsp);
2806 }
2807 
2808 static stateid_t
2809 get_stateid(id_t eid)
2810 {
2811 	stateid_t id;
2812 	nfs4_srv_t *nsrv4;
2813 
2814 	nsrv4 = nfs4_get_srv();
2815 
2816 	id.bits.boottime = nsrv4->rfs4_start_time;
2817 	id.bits.ident = eid;
2818 	id.bits.chgseq = 0;
2819 	id.bits.type = 0;
2820 	id.bits.pid = 0;
2821 
2822 	/*
2823 	 * If we are booted as a cluster node, embed our nodeid.
2824 	 * We've already done sanity checks in rfs4_client_create() so no
2825 	 * need to repeat them here.
2826 	 */
2827 	id.bits.clnodeid = (cluster_bootflags & CLUSTER_BOOTED) ?
2828 	    clconf_get_nodeid() : 0;
2829 
2830 	return (id);
2831 }
2832 
2833 /*
2834  * For use only when booted as a cluster node.
2835  * Returns TRUE if the embedded nodeid indicates that this stateid was
2836  * generated on another node.
2837  */
2838 static int
2839 foreign_stateid(stateid_t *id)
2840 {
2841 	ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2842 	return (id->bits.clnodeid != (uint32_t)clconf_get_nodeid());
2843 }
2844 
2845 /*
2846  * For use only when booted as a cluster node.
2847  * Returns TRUE if the embedded nodeid indicates that this clientid was
2848  * generated on another node.
2849  */
2850 static int
2851 foreign_clientid(cid *cidp)
2852 {
2853 	ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2854 	return (cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT !=
2855 	    (uint32_t)clconf_get_nodeid());
2856 }
2857 
2858 /*
2859  * For use only when booted as a cluster node.
2860  * Embed our cluster nodeid into the clientid.
2861  */
2862 static void
2863 embed_nodeid(cid *cidp)
2864 {
2865 	int clnodeid;
2866 	/*
2867 	 * Currently, our state tables are small enough that their
2868 	 * ids will leave enough bits free for the nodeid. If the
2869 	 * tables become larger, we mustn't overwrite the id.
2870 	 * Equally, we only have room for so many bits of nodeid, so
2871 	 * must check that too.
2872 	 */
2873 	ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2874 	ASSERT(cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT == 0);
2875 	clnodeid = clconf_get_nodeid();
2876 	ASSERT(clnodeid <= CLUSTER_MAX_NODEID);
2877 	ASSERT(clnodeid != NODEID_UNKNOWN);
2878 	cidp->impl_id.c_id |= (clnodeid << CLUSTER_NODEID_SHIFT);
2879 }
2880 
2881 static uint32_t
2882 state_hash(void *key)
2883 {
2884 	stateid_t *ip = (stateid_t *)key;
2885 
2886 	return (ip->bits.ident);
2887 }
2888 
2889 static bool_t
2890 state_compare(rfs4_entry_t u_entry, void *key)
2891 {
2892 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2893 	stateid_t *id = (stateid_t *)key;
2894 	bool_t rc;
2895 
2896 	rc = (sp->rs_stateid.bits.boottime == id->bits.boottime &&
2897 	    sp->rs_stateid.bits.ident == id->bits.ident);
2898 
2899 	return (rc);
2900 }
2901 
2902 static void *
2903 state_mkkey(rfs4_entry_t u_entry)
2904 {
2905 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2906 
2907 	return (&sp->rs_stateid);
2908 }
2909 
2910 static void
2911 rfs4_state_destroy(rfs4_entry_t u_entry)
2912 {
2913 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2914 
2915 	/* remove from openowner list */
2916 	rfs4_dbe_lock(sp->rs_owner->ro_dbe);
2917 	list_remove(&sp->rs_owner->ro_statelist, sp);
2918 	rfs4_dbe_unlock(sp->rs_owner->ro_dbe);
2919 
2920 	list_destroy(&sp->rs_lostatelist);
2921 
2922 	/* release any share locks for this stateid if it's still open */
2923 	if (!sp->rs_closed) {
2924 		rfs4_dbe_lock(sp->rs_dbe);
2925 		(void) rfs4_unshare(sp);
2926 		rfs4_dbe_unlock(sp->rs_dbe);
2927 	}
2928 
2929 	/* Were done with the file */
2930 	rfs4_file_rele(sp->rs_finfo);
2931 	sp->rs_finfo = NULL;
2932 
2933 	/* And now with the openowner */
2934 	rfs4_openowner_rele(sp->rs_owner);
2935 	sp->rs_owner = NULL;
2936 }
2937 
2938 static void
2939 rfs4_state_rele_nounlock(rfs4_state_t *sp)
2940 {
2941 	rfs4_dbe_rele(sp->rs_dbe);
2942 }
2943 
2944 void
2945 rfs4_state_rele(rfs4_state_t *sp)
2946 {
2947 	rw_exit(&sp->rs_finfo->rf_file_rwlock);
2948 	rfs4_dbe_rele(sp->rs_dbe);
2949 }
2950 
2951 static uint32_t
2952 deleg_hash(void *key)
2953 {
2954 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)key;
2955 
2956 	return (ADDRHASH(dsp->rds_client) ^ ADDRHASH(dsp->rds_finfo));
2957 }
2958 
2959 static bool_t
2960 deleg_compare(rfs4_entry_t u_entry, void *key)
2961 {
2962 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2963 	rfs4_deleg_state_t *kdsp = (rfs4_deleg_state_t *)key;
2964 
2965 	return (dsp->rds_client == kdsp->rds_client &&
2966 	    dsp->rds_finfo == kdsp->rds_finfo);
2967 }
2968 
2969 static void *
2970 deleg_mkkey(rfs4_entry_t u_entry)
2971 {
2972 	return (u_entry);
2973 }
2974 
2975 static uint32_t
2976 deleg_state_hash(void *key)
2977 {
2978 	stateid_t *ip = (stateid_t *)key;
2979 
2980 	return (ip->bits.ident);
2981 }
2982 
2983 static bool_t
2984 deleg_state_compare(rfs4_entry_t u_entry, void *key)
2985 {
2986 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2987 	stateid_t *id = (stateid_t *)key;
2988 	bool_t rc;
2989 
2990 	if (id->bits.type != DELEGID)
2991 		return (FALSE);
2992 
2993 	rc = (dsp->rds_delegid.bits.boottime == id->bits.boottime &&
2994 	    dsp->rds_delegid.bits.ident == id->bits.ident);
2995 
2996 	return (rc);
2997 }
2998 
2999 static void *
3000 deleg_state_mkkey(rfs4_entry_t u_entry)
3001 {
3002 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3003 
3004 	return (&dsp->rds_delegid);
3005 }
3006 
3007 static bool_t
3008 rfs4_deleg_state_expiry(rfs4_entry_t u_entry)
3009 {
3010 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3011 
3012 	if (rfs4_dbe_is_invalid(dsp->rds_dbe))
3013 		return (TRUE);
3014 
3015 	if (dsp->rds_dtype == OPEN_DELEGATE_NONE)
3016 		return (TRUE);
3017 
3018 	if ((gethrestime_sec() - dsp->rds_client->rc_last_access
3019 	    > rfs4_lease_time)) {
3020 		rfs4_dbe_invalidate(dsp->rds_dbe);
3021 		return (TRUE);
3022 	}
3023 
3024 	return (FALSE);
3025 }
3026 
3027 static bool_t
3028 rfs4_deleg_state_create(rfs4_entry_t u_entry, void *argp)
3029 {
3030 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3031 	rfs4_file_t *fp = ((rfs4_deleg_state_t *)argp)->rds_finfo;
3032 	rfs4_client_t *cp = ((rfs4_deleg_state_t *)argp)->rds_client;
3033 
3034 	rfs4_dbe_hold(fp->rf_dbe);
3035 	rfs4_dbe_hold(cp->rc_dbe);
3036 
3037 	dsp->rds_delegid = get_stateid(rfs4_dbe_getid(dsp->rds_dbe));
3038 	dsp->rds_delegid.bits.type = DELEGID;
3039 	dsp->rds_finfo = fp;
3040 	dsp->rds_client = cp;
3041 	dsp->rds_dtype = OPEN_DELEGATE_NONE;
3042 
3043 	dsp->rds_time_granted = gethrestime_sec();	/* observability */
3044 	dsp->rds_time_revoked = 0;
3045 
3046 	list_link_init(&dsp->rds_node);
3047 
3048 	return (TRUE);
3049 }
3050 
3051 static void
3052 rfs4_deleg_state_destroy(rfs4_entry_t u_entry)
3053 {
3054 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3055 
3056 	/* return delegation if necessary */
3057 	rfs4_return_deleg(dsp, FALSE);
3058 
3059 	/* Were done with the file */
3060 	rfs4_file_rele(dsp->rds_finfo);
3061 	dsp->rds_finfo = NULL;
3062 
3063 	/* And now with the openowner */
3064 	rfs4_client_rele(dsp->rds_client);
3065 	dsp->rds_client = NULL;
3066 }
3067 
3068 rfs4_deleg_state_t *
3069 rfs4_finddeleg(rfs4_state_t *sp, bool_t *create)
3070 {
3071 	rfs4_deleg_state_t ds, *dsp;
3072 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
3073 
3074 	ds.rds_client = sp->rs_owner->ro_client;
3075 	ds.rds_finfo = sp->rs_finfo;
3076 
3077 	dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_idx, &ds,
3078 	    create, &ds, RFS4_DBS_VALID);
3079 
3080 	return (dsp);
3081 }
3082 
3083 rfs4_deleg_state_t *
3084 rfs4_finddelegstate(stateid_t *id)
3085 {
3086 	rfs4_deleg_state_t *dsp;
3087 	bool_t create = FALSE;
3088 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
3089 
3090 	dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_state_idx,
3091 	    id, &create, NULL, RFS4_DBS_VALID);
3092 
3093 	return (dsp);
3094 }
3095 
3096 void
3097 rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp)
3098 {
3099 	rfs4_dbe_rele(dsp->rds_dbe);
3100 }
3101 
3102 void
3103 rfs4_update_lock_sequence(rfs4_lo_state_t *lsp)
3104 {
3105 
3106 	rfs4_dbe_lock(lsp->rls_dbe);
3107 
3108 	/*
3109 	 * If we are skipping sequence id checking, this means that
3110 	 * this is the first lock request and therefore the sequence
3111 	 * id does not need to be updated.  This only happens on the
3112 	 * first lock request for a lockowner
3113 	 */
3114 	if (!lsp->rls_skip_seqid_check)
3115 		lsp->rls_seqid++;
3116 
3117 	rfs4_dbe_unlock(lsp->rls_dbe);
3118 }
3119 
3120 void
3121 rfs4_update_lock_resp(rfs4_lo_state_t *lsp, nfs_resop4 *resp)
3122 {
3123 
3124 	rfs4_dbe_lock(lsp->rls_dbe);
3125 
3126 	rfs4_free_reply(&lsp->rls_reply);
3127 
3128 	rfs4_copy_reply(&lsp->rls_reply, resp);
3129 
3130 	rfs4_dbe_unlock(lsp->rls_dbe);
3131 }
3132 
3133 void
3134 rfs4_free_opens(rfs4_openowner_t *oo, bool_t invalidate,
3135     bool_t close_of_client)
3136 {
3137 	rfs4_state_t *sp;
3138 
3139 	rfs4_dbe_lock(oo->ro_dbe);
3140 
3141 	for (sp = list_head(&oo->ro_statelist); sp != NULL;
3142 	    sp = list_next(&oo->ro_statelist, sp)) {
3143 		rfs4_state_close(sp, FALSE, close_of_client, CRED());
3144 		if (invalidate == TRUE)
3145 			rfs4_dbe_invalidate(sp->rs_dbe);
3146 	}
3147 
3148 	rfs4_dbe_invalidate(oo->ro_dbe);
3149 	rfs4_dbe_unlock(oo->ro_dbe);
3150 }
3151 
3152 static uint32_t
3153 state_owner_file_hash(void *key)
3154 {
3155 	rfs4_state_t *sp = key;
3156 
3157 	return (ADDRHASH(sp->rs_owner) ^ ADDRHASH(sp->rs_finfo));
3158 }
3159 
3160 static bool_t
3161 state_owner_file_compare(rfs4_entry_t u_entry, void *key)
3162 {
3163 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3164 	rfs4_state_t *arg = key;
3165 
3166 	if (sp->rs_closed == TRUE)
3167 		return (FALSE);
3168 
3169 	return (arg->rs_owner == sp->rs_owner && arg->rs_finfo == sp->rs_finfo);
3170 }
3171 
3172 static void *
3173 state_owner_file_mkkey(rfs4_entry_t u_entry)
3174 {
3175 	return (u_entry);
3176 }
3177 
3178 static uint32_t
3179 state_file_hash(void *key)
3180 {
3181 	return (ADDRHASH(key));
3182 }
3183 
3184 static bool_t
3185 state_file_compare(rfs4_entry_t u_entry, void *key)
3186 {
3187 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3188 	rfs4_file_t *fp = key;
3189 
3190 	if (sp->rs_closed == TRUE)
3191 		return (FALSE);
3192 
3193 	return (fp == sp->rs_finfo);
3194 }
3195 
3196 static void *
3197 state_file_mkkey(rfs4_entry_t u_entry)
3198 {
3199 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3200 
3201 	return (sp->rs_finfo);
3202 }
3203 
3204 rfs4_state_t *
3205 rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp,
3206     bool_t *create)
3207 {
3208 	rfs4_state_t *sp;
3209 	rfs4_state_t key;
3210 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
3211 
3212 	key.rs_owner = oo;
3213 	key.rs_finfo = fp;
3214 
3215 	sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_owner_file_idx,
3216 	    &key, create, &key, RFS4_DBS_VALID);
3217 
3218 	return (sp);
3219 }
3220 
3221 /* This returns ANY state struct that refers to this file */
3222 static rfs4_state_t *
3223 rfs4_findstate_by_file(rfs4_file_t *fp)
3224 {
3225 	bool_t create = FALSE;
3226 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
3227 
3228 	return ((rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_file_idx, fp,
3229 	    &create, fp, RFS4_DBS_VALID));
3230 }
3231 
3232 static bool_t
3233 rfs4_state_expiry(rfs4_entry_t u_entry)
3234 {
3235 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3236 
3237 	if (rfs4_dbe_is_invalid(sp->rs_dbe))
3238 		return (TRUE);
3239 
3240 	if (sp->rs_closed == TRUE &&
3241 	    ((gethrestime_sec() - rfs4_dbe_get_timerele(sp->rs_dbe))
3242 	    > rfs4_lease_time))
3243 		return (TRUE);
3244 
3245 	return ((gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access
3246 	    > rfs4_lease_time));
3247 }
3248 
3249 static bool_t
3250 rfs4_state_create(rfs4_entry_t u_entry, void *argp)
3251 {
3252 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3253 	rfs4_file_t *fp = ((rfs4_state_t *)argp)->rs_finfo;
3254 	rfs4_openowner_t *oo = ((rfs4_state_t *)argp)->rs_owner;
3255 
3256 	rfs4_dbe_hold(fp->rf_dbe);
3257 	rfs4_dbe_hold(oo->ro_dbe);
3258 	sp->rs_stateid = get_stateid(rfs4_dbe_getid(sp->rs_dbe));
3259 	sp->rs_stateid.bits.type = OPENID;
3260 	sp->rs_owner = oo;
3261 	sp->rs_finfo = fp;
3262 
3263 	list_create(&sp->rs_lostatelist, sizeof (rfs4_lo_state_t),
3264 	    offsetof(rfs4_lo_state_t, rls_node));
3265 
3266 	/* Insert state on per open owner's list */
3267 	rfs4_dbe_lock(oo->ro_dbe);
3268 	list_insert_tail(&oo->ro_statelist, sp);
3269 	rfs4_dbe_unlock(oo->ro_dbe);
3270 
3271 	return (TRUE);
3272 }
3273 
3274 static rfs4_state_t *
3275 rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3276 {
3277 	rfs4_state_t *sp;
3278 	bool_t create = FALSE;
3279 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
3280 
3281 	sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_idx, id,
3282 	    &create, NULL, find_invalid);
3283 	if (lock_fp == TRUE && sp != NULL)
3284 		rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER);
3285 
3286 	return (sp);
3287 }
3288 
3289 void
3290 rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, bool_t close_of_client,
3291     cred_t *cr)
3292 {
3293 	/* Remove the associated lo_state owners */
3294 	if (!lock_held)
3295 		rfs4_dbe_lock(sp->rs_dbe);
3296 
3297 	/*
3298 	 * If refcnt == 0, the dbe is about to be destroyed.
3299 	 * lock state will be released by the reaper thread.
3300 	 */
3301 
3302 	if (rfs4_dbe_refcnt(sp->rs_dbe) > 0) {
3303 		if (sp->rs_closed == FALSE) {
3304 			rfs4_release_share_lock_state(sp, cr, close_of_client);
3305 			sp->rs_closed = TRUE;
3306 		}
3307 	}
3308 
3309 	if (!lock_held)
3310 		rfs4_dbe_unlock(sp->rs_dbe);
3311 }
3312 
3313 /*
3314  * Remove all state associated with the given client.
3315  */
3316 void
3317 rfs4_client_state_remove(rfs4_client_t *cp)
3318 {
3319 	rfs4_openowner_t *oo;
3320 
3321 	rfs4_dbe_lock(cp->rc_dbe);
3322 
3323 	for (oo = list_head(&cp->rc_openownerlist); oo != NULL;
3324 	    oo = list_next(&cp->rc_openownerlist, oo)) {
3325 		rfs4_free_opens(oo, TRUE, TRUE);
3326 	}
3327 
3328 	rfs4_dbe_unlock(cp->rc_dbe);
3329 }
3330 
3331 void
3332 rfs4_client_close(rfs4_client_t *cp)
3333 {
3334 	/* Mark client as going away. */
3335 	rfs4_dbe_lock(cp->rc_dbe);
3336 	rfs4_dbe_invalidate(cp->rc_dbe);
3337 	rfs4_dbe_unlock(cp->rc_dbe);
3338 
3339 	rfs4_client_state_remove(cp);
3340 
3341 	/* Release the client */
3342 	rfs4_client_rele(cp);
3343 }
3344 
3345 nfsstat4
3346 rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
3347 {
3348 	cid *cidp = (cid *) cp;
3349 	nfs4_srv_t *nsrv4;
3350 
3351 	nsrv4 = nfs4_get_srv();
3352 
3353 	/*
3354 	 * If we are booted as a cluster node, check the embedded nodeid.
3355 	 * If it indicates that this clientid was generated on another node,
3356 	 * inform the client accordingly.
3357 	 */
3358 	if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
3359 		return (NFS4ERR_STALE_CLIENTID);
3360 
3361 	/*
3362 	 * If the server start time matches the time provided
3363 	 * by the client (via the clientid) and this is NOT a
3364 	 * setclientid_confirm then return EXPIRED.
3365 	 */
3366 	if (!setclid_confirm &&
3367 	    cidp->impl_id.start_time == nsrv4->rfs4_start_time)
3368 		return (NFS4ERR_EXPIRED);
3369 
3370 	return (NFS4ERR_STALE_CLIENTID);
3371 }
3372 
3373 /*
3374  * This is used when a stateid has not been found amongst the
3375  * current server's state.  Check the stateid to see if it
3376  * was from this server instantiation or not.
3377  */
3378 static nfsstat4
3379 what_stateid_error(stateid_t *id, stateid_type_t type)
3380 {
3381 	nfs4_srv_t *nsrv4;
3382 
3383 	nsrv4 = nfs4_get_srv();
3384 
3385 	/* If we are booted as a cluster node, was stateid locally generated? */
3386 	if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3387 		return (NFS4ERR_STALE_STATEID);
3388 
3389 	/* If types don't match then no use checking further */
3390 	if (type != id->bits.type)
3391 		return (NFS4ERR_BAD_STATEID);
3392 
3393 	/* From a different server instantiation, return STALE */
3394 	if (id->bits.boottime != nsrv4->rfs4_start_time)
3395 		return (NFS4ERR_STALE_STATEID);
3396 
3397 	/*
3398 	 * From this server but the state is most likely beyond lease
3399 	 * timeout: return NFS4ERR_EXPIRED.  However, there is the
3400 	 * case of a delegation stateid.  For delegations, there is a
3401 	 * case where the state can be removed without the client's
3402 	 * knowledge/consent: revocation.  In the case of delegation
3403 	 * revocation, the delegation state will be removed and will
3404 	 * not be found.  If the client does something like a
3405 	 * DELEGRETURN or even a READ/WRITE with a delegatoin stateid
3406 	 * that has been revoked, the server should return BAD_STATEID
3407 	 * instead of the more common EXPIRED error.
3408 	 */
3409 	if (id->bits.boottime == nsrv4->rfs4_start_time) {
3410 		if (type == DELEGID)
3411 			return (NFS4ERR_BAD_STATEID);
3412 		else
3413 			return (NFS4ERR_EXPIRED);
3414 	}
3415 
3416 	return (NFS4ERR_BAD_STATEID);
3417 }
3418 
3419 /*
3420  * Used later on to find the various state structs.  When called from
3421  * rfs4_check_stateid()->rfs4_get_all_state(), no file struct lock is
3422  * taken (it is not needed) and helps on the read/write path with
3423  * respect to performance.
3424  */
3425 static nfsstat4
3426 rfs4_get_state_lockit(stateid4 *stateid, rfs4_state_t **spp,
3427     rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3428 {
3429 	stateid_t *id = (stateid_t *)stateid;
3430 	rfs4_state_t *sp;
3431 
3432 	*spp = NULL;
3433 
3434 	/* If we are booted as a cluster node, was stateid locally generated? */
3435 	if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3436 		return (NFS4ERR_STALE_STATEID);
3437 
3438 	sp = rfs4_findstate(id, find_invalid, lock_fp);
3439 	if (sp == NULL) {
3440 		return (what_stateid_error(id, OPENID));
3441 	}
3442 
3443 	if (rfs4_lease_expired(sp->rs_owner->ro_client)) {
3444 		if (lock_fp == TRUE)
3445 			rfs4_state_rele(sp);
3446 		else
3447 			rfs4_state_rele_nounlock(sp);
3448 		return (NFS4ERR_EXPIRED);
3449 	}
3450 
3451 	*spp = sp;
3452 
3453 	return (NFS4_OK);
3454 }
3455 
3456 nfsstat4
3457 rfs4_get_state(stateid4 *stateid, rfs4_state_t **spp,
3458     rfs4_dbsearch_type_t find_invalid)
3459 {
3460 	return (rfs4_get_state_lockit(stateid, spp, find_invalid, TRUE));
3461 }
3462 
3463 int
3464 rfs4_check_stateid_seqid(rfs4_state_t *sp, stateid4 *stateid)
3465 {
3466 	stateid_t *id = (stateid_t *)stateid;
3467 
3468 	if (rfs4_lease_expired(sp->rs_owner->ro_client))
3469 		return (NFS4_CHECK_STATEID_EXPIRED);
3470 
3471 	/* Stateid is some time in the future - that's bad */
3472 	if (sp->rs_stateid.bits.chgseq < id->bits.chgseq)
3473 		return (NFS4_CHECK_STATEID_BAD);
3474 
3475 	if (sp->rs_stateid.bits.chgseq == id->bits.chgseq + 1)
3476 		return (NFS4_CHECK_STATEID_REPLAY);
3477 
3478 	/* Stateid is some time in the past - that's old */
3479 	if (sp->rs_stateid.bits.chgseq > id->bits.chgseq)
3480 		return (NFS4_CHECK_STATEID_OLD);
3481 
3482 	/* Caller needs to know about confirmation before closure */
3483 	if (sp->rs_owner->ro_need_confirm)
3484 		return (NFS4_CHECK_STATEID_UNCONFIRMED);
3485 
3486 	if (sp->rs_closed == TRUE)
3487 		return (NFS4_CHECK_STATEID_CLOSED);
3488 
3489 	return (NFS4_CHECK_STATEID_OKAY);
3490 }
3491 
3492 int
3493 rfs4_check_lo_stateid_seqid(rfs4_lo_state_t *lsp, stateid4 *stateid)
3494 {
3495 	stateid_t *id = (stateid_t *)stateid;
3496 
3497 	if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client))
3498 		return (NFS4_CHECK_STATEID_EXPIRED);
3499 
3500 	/* Stateid is some time in the future - that's bad */
3501 	if (lsp->rls_lockid.bits.chgseq < id->bits.chgseq)
3502 		return (NFS4_CHECK_STATEID_BAD);
3503 
3504 	if (lsp->rls_lockid.bits.chgseq == id->bits.chgseq + 1)
3505 		return (NFS4_CHECK_STATEID_REPLAY);
3506 
3507 	/* Stateid is some time in the past - that's old */
3508 	if (lsp->rls_lockid.bits.chgseq > id->bits.chgseq)
3509 		return (NFS4_CHECK_STATEID_OLD);
3510 
3511 	if (lsp->rls_state->rs_closed == TRUE)
3512 		return (NFS4_CHECK_STATEID_CLOSED);
3513 
3514 	return (NFS4_CHECK_STATEID_OKAY);
3515 }
3516 
3517 nfsstat4
3518 rfs4_get_deleg_state(stateid4 *stateid, rfs4_deleg_state_t **dspp)
3519 {
3520 	stateid_t *id = (stateid_t *)stateid;
3521 	rfs4_deleg_state_t *dsp;
3522 
3523 	*dspp = NULL;
3524 
3525 	/* If we are booted as a cluster node, was stateid locally generated? */
3526 	if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3527 		return (NFS4ERR_STALE_STATEID);
3528 
3529 	dsp = rfs4_finddelegstate(id);
3530 	if (dsp == NULL) {
3531 		return (what_stateid_error(id, DELEGID));
3532 	}
3533 
3534 	if (rfs4_lease_expired(dsp->rds_client)) {
3535 		rfs4_deleg_state_rele(dsp);
3536 		return (NFS4ERR_EXPIRED);
3537 	}
3538 
3539 	*dspp = dsp;
3540 
3541 	return (NFS4_OK);
3542 }
3543 
3544 nfsstat4
3545 rfs4_get_lo_state(stateid4 *stateid, rfs4_lo_state_t **lspp, bool_t lock_fp)
3546 {
3547 	stateid_t *id = (stateid_t *)stateid;
3548 	rfs4_lo_state_t *lsp;
3549 
3550 	*lspp = NULL;
3551 
3552 	/* If we are booted as a cluster node, was stateid locally generated? */
3553 	if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3554 		return (NFS4ERR_STALE_STATEID);
3555 
3556 	lsp = rfs4_findlo_state(id, lock_fp);
3557 	if (lsp == NULL) {
3558 		return (what_stateid_error(id, LOCKID));
3559 	}
3560 
3561 	if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client)) {
3562 		rfs4_lo_state_rele(lsp, lock_fp);
3563 		return (NFS4ERR_EXPIRED);
3564 	}
3565 
3566 	*lspp = lsp;
3567 
3568 	return (NFS4_OK);
3569 }
3570 
3571 static nfsstat4
3572 rfs4_get_all_state(stateid4 *sid, rfs4_state_t **spp,
3573     rfs4_deleg_state_t **dspp, rfs4_lo_state_t **lspp)
3574 {
3575 	rfs4_state_t *sp = NULL;
3576 	rfs4_deleg_state_t *dsp = NULL;
3577 	rfs4_lo_state_t *lsp = NULL;
3578 	stateid_t *id;
3579 	nfsstat4 status;
3580 
3581 	*spp = NULL; *dspp = NULL; *lspp = NULL;
3582 
3583 	id = (stateid_t *)sid;
3584 	switch (id->bits.type) {
3585 	case OPENID:
3586 		status = rfs4_get_state_lockit(sid, &sp, FALSE, FALSE);
3587 		break;
3588 	case DELEGID:
3589 		status = rfs4_get_deleg_state(sid, &dsp);
3590 		break;
3591 	case LOCKID:
3592 		status = rfs4_get_lo_state(sid, &lsp, FALSE);
3593 		if (status == NFS4_OK) {
3594 			sp = lsp->rls_state;
3595 			rfs4_dbe_hold(sp->rs_dbe);
3596 		}
3597 		break;
3598 	default:
3599 		status = NFS4ERR_BAD_STATEID;
3600 	}
3601 
3602 	if (status == NFS4_OK) {
3603 		*spp = sp;
3604 		*dspp = dsp;
3605 		*lspp = lsp;
3606 	}
3607 
3608 	return (status);
3609 }
3610 
3611 /*
3612  * Given the I/O mode (FREAD or FWRITE), this checks whether the
3613  * rfs4_state_t struct has access to do this operation and if so
3614  * return NFS4_OK; otherwise the proper NFSv4 error is returned.
3615  */
3616 nfsstat4
3617 rfs4_state_has_access(rfs4_state_t *sp, int mode, vnode_t *vp)
3618 {
3619 	nfsstat4 stat = NFS4_OK;
3620 	rfs4_file_t *fp;
3621 	bool_t create = FALSE;
3622 
3623 	rfs4_dbe_lock(sp->rs_dbe);
3624 	if (mode == FWRITE) {
3625 		if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)) {
3626 			stat = NFS4ERR_OPENMODE;
3627 		}
3628 	} else if (mode == FREAD) {
3629 		if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)) {
3630 			/*
3631 			 * If we have OPENed the file with DENYing access
3632 			 * to both READ and WRITE then no one else could
3633 			 * have OPENed the file, hence no conflicting READ
3634 			 * deny.  This check is merely an optimization.
3635 			 */
3636 			if (sp->rs_share_deny == OPEN4_SHARE_DENY_BOTH)
3637 				goto out;
3638 
3639 			/* Check against file struct's DENY mode */
3640 			fp = rfs4_findfile(vp, NULL, &create);
3641 			if (fp != NULL) {
3642 				int deny_read = 0;
3643 				rfs4_dbe_lock(fp->rf_dbe);
3644 				/*
3645 				 * Check if any other open owner has the file
3646 				 * OPENed with deny READ.
3647 				 */
3648 				if (sp->rs_share_deny & OPEN4_SHARE_DENY_READ)
3649 					deny_read = 1;
3650 				ASSERT(fp->rf_deny_read >= deny_read);
3651 				if (fp->rf_deny_read > deny_read)
3652 					stat = NFS4ERR_OPENMODE;
3653 				rfs4_dbe_unlock(fp->rf_dbe);
3654 				rfs4_file_rele(fp);
3655 			}
3656 		}
3657 	} else {
3658 		/* Illegal I/O mode */
3659 		stat = NFS4ERR_INVAL;
3660 	}
3661 out:
3662 	rfs4_dbe_unlock(sp->rs_dbe);
3663 	return (stat);
3664 }
3665 
3666 /*
3667  * Given the I/O mode (FREAD or FWRITE), the vnode, the stateid and whether
3668  * the file is being truncated, return NFS4_OK if allowed or appropriate
3669  * V4 error if not. Note NFS4ERR_DELAY will be returned and a recall on
3670  * the associated file will be done if the I/O is not consistent with any
3671  * delegation in effect on the file. Should be holding VOP_RWLOCK, either
3672  * as reader or writer as appropriate. rfs4_op_open will acquire the
3673  * VOP_RWLOCK as writer when setting up delegation. If the stateid is bad
3674  * this routine will return NFS4ERR_BAD_STATEID. In addition, through the
3675  * deleg parameter, we will return whether a write delegation is held by
3676  * the client associated with this stateid.
3677  * If the server instance associated with the relevant client is in its
3678  * grace period, return NFS4ERR_GRACE.
3679  */
3680 
3681 nfsstat4
3682 rfs4_check_stateid(int mode, vnode_t *vp,
3683     stateid4 *stateid, bool_t trunc, bool_t *deleg,
3684     bool_t do_access, caller_context_t *ct)
3685 {
3686 	rfs4_file_t *fp;
3687 	bool_t create = FALSE;
3688 	rfs4_state_t *sp;
3689 	rfs4_deleg_state_t *dsp;
3690 	rfs4_lo_state_t *lsp;
3691 	stateid_t *id = (stateid_t *)stateid;
3692 	nfsstat4 stat = NFS4_OK;
3693 
3694 	if (ct != NULL) {
3695 		ct->cc_sysid = 0;
3696 		ct->cc_pid = 0;
3697 		ct->cc_caller_id = nfs4_srv_caller_id;
3698 		ct->cc_flags = CC_DONTBLOCK;
3699 	}
3700 
3701 	if (ISSPECIAL(stateid)) {
3702 		fp = rfs4_findfile(vp, NULL, &create);
3703 		if (fp == NULL)
3704 			return (NFS4_OK);
3705 		if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
3706 			rfs4_file_rele(fp);
3707 			return (NFS4_OK);
3708 		}
3709 		if (mode == FWRITE ||
3710 		    fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
3711 			rfs4_recall_deleg(fp, trunc, NULL);
3712 			rfs4_file_rele(fp);
3713 			return (NFS4ERR_DELAY);
3714 		}
3715 		rfs4_file_rele(fp);
3716 		return (NFS4_OK);
3717 	} else {
3718 		stat = rfs4_get_all_state(stateid, &sp, &dsp, &lsp);
3719 		if (stat != NFS4_OK)
3720 			return (stat);
3721 		if (lsp != NULL) {
3722 			/* Is associated server instance in its grace period? */
3723 			if (rfs4_clnt_in_grace(lsp->rls_locker->rl_client)) {
3724 				rfs4_lo_state_rele(lsp, FALSE);
3725 				if (sp != NULL)
3726 					rfs4_state_rele_nounlock(sp);
3727 				return (NFS4ERR_GRACE);
3728 			}
3729 			if (id->bits.type == LOCKID) {
3730 				/* Seqid in the future? - that's bad */
3731 				if (lsp->rls_lockid.bits.chgseq <
3732 				    id->bits.chgseq) {
3733 					rfs4_lo_state_rele(lsp, FALSE);
3734 					if (sp != NULL)
3735 						rfs4_state_rele_nounlock(sp);
3736 					return (NFS4ERR_BAD_STATEID);
3737 				}
3738 				/* Seqid in the past? - that's old */
3739 				if (lsp->rls_lockid.bits.chgseq >
3740 				    id->bits.chgseq) {
3741 					rfs4_lo_state_rele(lsp, FALSE);
3742 					if (sp != NULL)
3743 						rfs4_state_rele_nounlock(sp);
3744 					return (NFS4ERR_OLD_STATEID);
3745 				}
3746 				/* Ensure specified filehandle matches */
3747 				if (lsp->rls_state->rs_finfo->rf_vp != vp) {
3748 					rfs4_lo_state_rele(lsp, FALSE);
3749 					if (sp != NULL)
3750 						rfs4_state_rele_nounlock(sp);
3751 					return (NFS4ERR_BAD_STATEID);
3752 				}
3753 			}
3754 			if (ct != NULL) {
3755 				ct->cc_sysid =
3756 				    lsp->rls_locker->rl_client->rc_sysidt;
3757 				ct->cc_pid = lsp->rls_locker->rl_pid;
3758 			}
3759 			rfs4_lo_state_rele(lsp, FALSE);
3760 		}
3761 
3762 		/* Stateid provided was an "open" stateid */
3763 		if (sp != NULL) {
3764 			/* Is associated server instance in its grace period? */
3765 			if (rfs4_clnt_in_grace(sp->rs_owner->ro_client)) {
3766 				rfs4_state_rele_nounlock(sp);
3767 				return (NFS4ERR_GRACE);
3768 			}
3769 			if (id->bits.type == OPENID) {
3770 				/* Seqid in the future? - that's bad */
3771 				if (sp->rs_stateid.bits.chgseq <
3772 				    id->bits.chgseq) {
3773 					rfs4_state_rele_nounlock(sp);
3774 					return (NFS4ERR_BAD_STATEID);
3775 				}
3776 				/* Seqid in the past - that's old */
3777 				if (sp->rs_stateid.bits.chgseq >
3778 				    id->bits.chgseq) {
3779 					rfs4_state_rele_nounlock(sp);
3780 					return (NFS4ERR_OLD_STATEID);
3781 				}
3782 			}
3783 			/* Ensure specified filehandle matches */
3784 			if (sp->rs_finfo->rf_vp != vp) {
3785 				rfs4_state_rele_nounlock(sp);
3786 				return (NFS4ERR_BAD_STATEID);
3787 			}
3788 
3789 			if (sp->rs_owner->ro_need_confirm) {
3790 				rfs4_state_rele_nounlock(sp);
3791 				return (NFS4ERR_BAD_STATEID);
3792 			}
3793 
3794 			if (sp->rs_closed == TRUE) {
3795 				rfs4_state_rele_nounlock(sp);
3796 				return (NFS4ERR_OLD_STATEID);
3797 			}
3798 
3799 			if (do_access)
3800 				stat = rfs4_state_has_access(sp, mode, vp);
3801 			else
3802 				stat = NFS4_OK;
3803 
3804 			/*
3805 			 * Return whether this state has write
3806 			 * delegation if desired
3807 			 */
3808 			if (deleg && (sp->rs_finfo->rf_dinfo.rd_dtype ==
3809 			    OPEN_DELEGATE_WRITE))
3810 				*deleg = TRUE;
3811 
3812 			/*
3813 			 * We got a valid stateid, so we update the
3814 			 * lease on the client. Ideally we would like
3815 			 * to do this after the calling op succeeds,
3816 			 * but for now this will be good
3817 			 * enough. Callers of this routine are
3818 			 * currently insulated from the state stuff.
3819 			 */
3820 			rfs4_update_lease(sp->rs_owner->ro_client);
3821 
3822 			/*
3823 			 * If a delegation is present on this file and
3824 			 * this is a WRITE, then update the lastwrite
3825 			 * time to indicate that activity is present.
3826 			 */
3827 			if (sp->rs_finfo->rf_dinfo.rd_dtype ==
3828 			    OPEN_DELEGATE_WRITE &&
3829 			    mode == FWRITE) {
3830 				sp->rs_finfo->rf_dinfo.rd_time_lastwrite =
3831 				    gethrestime_sec();
3832 			}
3833 
3834 			rfs4_state_rele_nounlock(sp);
3835 
3836 			return (stat);
3837 		}
3838 
3839 		if (dsp != NULL) {
3840 			/* Is associated server instance in its grace period? */
3841 			if (rfs4_clnt_in_grace(dsp->rds_client)) {
3842 				rfs4_deleg_state_rele(dsp);
3843 				return (NFS4ERR_GRACE);
3844 			}
3845 			if (dsp->rds_delegid.bits.chgseq != id->bits.chgseq) {
3846 				rfs4_deleg_state_rele(dsp);
3847 				return (NFS4ERR_BAD_STATEID);
3848 			}
3849 
3850 			/* Ensure specified filehandle matches */
3851 			if (dsp->rds_finfo->rf_vp != vp) {
3852 				rfs4_deleg_state_rele(dsp);
3853 				return (NFS4ERR_BAD_STATEID);
3854 			}
3855 			/*
3856 			 * Return whether this state has write
3857 			 * delegation if desired
3858 			 */
3859 			if (deleg && (dsp->rds_finfo->rf_dinfo.rd_dtype ==
3860 			    OPEN_DELEGATE_WRITE))
3861 				*deleg = TRUE;
3862 
3863 			rfs4_update_lease(dsp->rds_client);
3864 
3865 			/*
3866 			 * If a delegation is present on this file and
3867 			 * this is a WRITE, then update the lastwrite
3868 			 * time to indicate that activity is present.
3869 			 */
3870 			if (dsp->rds_finfo->rf_dinfo.rd_dtype ==
3871 			    OPEN_DELEGATE_WRITE && mode == FWRITE) {
3872 				dsp->rds_finfo->rf_dinfo.rd_time_lastwrite =
3873 				    gethrestime_sec();
3874 			}
3875 
3876 			/*
3877 			 * XXX - what happens if this is a WRITE and the
3878 			 * delegation type of for READ.
3879 			 */
3880 			rfs4_deleg_state_rele(dsp);
3881 
3882 			return (stat);
3883 		}
3884 		/*
3885 		 * If we got this far, something bad happened
3886 		 */
3887 		return (NFS4ERR_BAD_STATEID);
3888 	}
3889 }
3890 
3891 
3892 /*
3893  * This is a special function in that for the file struct provided the
3894  * server wants to remove/close all current state associated with the
3895  * file.  The prime use of this would be with OP_REMOVE to force the
3896  * release of state and particularly of file locks.
3897  *
3898  * There is an assumption that there is no delegations outstanding on
3899  * this file at this point.  The caller should have waited for those
3900  * to be returned or revoked.
3901  */
3902 void
3903 rfs4_close_all_state(rfs4_file_t *fp)
3904 {
3905 	rfs4_state_t *sp;
3906 
3907 	rfs4_dbe_lock(fp->rf_dbe);
3908 
3909 #ifdef DEBUG
3910 	/* only applies when server is handing out delegations */
3911 	if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE)
3912 		ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
3913 #endif
3914 
3915 	/* No delegations for this file */
3916 	ASSERT(list_is_empty(&fp->rf_delegstatelist));
3917 
3918 	/* Make sure that it can not be found */
3919 	rfs4_dbe_invalidate(fp->rf_dbe);
3920 
3921 	if (fp->rf_vp == NULL) {
3922 		rfs4_dbe_unlock(fp->rf_dbe);
3923 		return;
3924 	}
3925 	rfs4_dbe_unlock(fp->rf_dbe);
3926 
3927 	/*
3928 	 * Hold as writer to prevent other server threads from
3929 	 * processing requests related to the file while all state is
3930 	 * being removed.
3931 	 */
3932 	rw_enter(&fp->rf_file_rwlock, RW_WRITER);
3933 
3934 	/* Remove ALL state from the file */
3935 	while (sp = rfs4_findstate_by_file(fp)) {
3936 		rfs4_state_close(sp, FALSE, FALSE, CRED());
3937 		rfs4_state_rele_nounlock(sp);
3938 	}
3939 
3940 	/*
3941 	 * This is only safe since there are no further references to
3942 	 * the file.
3943 	 */
3944 	rfs4_dbe_lock(fp->rf_dbe);
3945 	if (fp->rf_vp) {
3946 		vnode_t *vp = fp->rf_vp;
3947 
3948 		mutex_enter(&vp->v_vsd_lock);
3949 		(void) vsd_set(vp, nfs4_srv_vkey, NULL);
3950 		mutex_exit(&vp->v_vsd_lock);
3951 		VN_RELE(vp);
3952 		fp->rf_vp = NULL;
3953 	}
3954 	rfs4_dbe_unlock(fp->rf_dbe);
3955 
3956 	/* Finally let other references to proceed */
3957 	rw_exit(&fp->rf_file_rwlock);
3958 }
3959 
3960 /*
3961  * This function is used as a target for the rfs4_dbe_walk() call
3962  * below.  The purpose of this function is to see if the
3963  * lockowner_state refers to a file that resides within the exportinfo
3964  * export.  If so, then remove the lock_owner state (file locks and
3965  * share "locks") for this object since the intent is the server is
3966  * unexporting the specified directory.  Be sure to invalidate the
3967  * object after the state has been released
3968  */
3969 static void
3970 rfs4_lo_state_walk_callout(rfs4_entry_t u_entry, void *e)
3971 {
3972 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
3973 	struct exportinfo *exi = (struct exportinfo *)e;
3974 	nfs_fh4_fmt_t   fhfmt4, *exi_fhp, *finfo_fhp;
3975 	fhandle_t *efhp;
3976 
3977 	efhp = (fhandle_t *)&exi->exi_fh;
3978 	exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
3979 
3980 	FH_TO_FMT4(efhp, exi_fhp);
3981 
3982 	finfo_fhp = (nfs_fh4_fmt_t *)lsp->rls_state->rs_finfo->
3983 	    rf_filehandle.nfs_fh4_val;
3984 
3985 	if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
3986 	    bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
3987 	    exi_fhp->fh4_xlen) == 0) {
3988 		rfs4_state_close(lsp->rls_state, FALSE, FALSE, CRED());
3989 		rfs4_dbe_invalidate(lsp->rls_dbe);
3990 		rfs4_dbe_invalidate(lsp->rls_state->rs_dbe);
3991 	}
3992 }
3993 
3994 /*
3995  * This function is used as a target for the rfs4_dbe_walk() call
3996  * below.  The purpose of this function is to see if the state refers
3997  * to a file that resides within the exportinfo export.  If so, then
3998  * remove the open state for this object since the intent is the
3999  * server is unexporting the specified directory.  The main result for
4000  * this type of entry is to invalidate it such it will not be found in
4001  * the future.
4002  */
4003 static void
4004 rfs4_state_walk_callout(rfs4_entry_t u_entry, void *e)
4005 {
4006 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
4007 	struct exportinfo *exi = (struct exportinfo *)e;
4008 	nfs_fh4_fmt_t   fhfmt4, *exi_fhp, *finfo_fhp;
4009 	fhandle_t *efhp;
4010 
4011 	efhp = (fhandle_t *)&exi->exi_fh;
4012 	exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4013 
4014 	FH_TO_FMT4(efhp, exi_fhp);
4015 
4016 	finfo_fhp =
4017 	    (nfs_fh4_fmt_t *)sp->rs_finfo->rf_filehandle.nfs_fh4_val;
4018 
4019 	if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4020 	    bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4021 	    exi_fhp->fh4_xlen) == 0) {
4022 		rfs4_state_close(sp, TRUE, FALSE, CRED());
4023 		rfs4_dbe_invalidate(sp->rs_dbe);
4024 	}
4025 }
4026 
4027 /*
4028  * This function is used as a target for the rfs4_dbe_walk() call
4029  * below.  The purpose of this function is to see if the state refers
4030  * to a file that resides within the exportinfo export.  If so, then
4031  * remove the deleg state for this object since the intent is the
4032  * server is unexporting the specified directory.  The main result for
4033  * this type of entry is to invalidate it such it will not be found in
4034  * the future.
4035  */
4036 static void
4037 rfs4_deleg_state_walk_callout(rfs4_entry_t u_entry, void *e)
4038 {
4039 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
4040 	struct exportinfo *exi = (struct exportinfo *)e;
4041 	nfs_fh4_fmt_t   fhfmt4, *exi_fhp, *finfo_fhp;
4042 	fhandle_t *efhp;
4043 
4044 	efhp = (fhandle_t *)&exi->exi_fh;
4045 	exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4046 
4047 	FH_TO_FMT4(efhp, exi_fhp);
4048 
4049 	finfo_fhp =
4050 	    (nfs_fh4_fmt_t *)dsp->rds_finfo->rf_filehandle.nfs_fh4_val;
4051 
4052 	if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4053 	    bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4054 	    exi_fhp->fh4_xlen) == 0) {
4055 		rfs4_dbe_invalidate(dsp->rds_dbe);
4056 	}
4057 }
4058 
4059 /*
4060  * This function is used as a target for the rfs4_dbe_walk() call
4061  * below.  The purpose of this function is to see if the state refers
4062  * to a file that resides within the exportinfo export.  If so, then
4063  * release vnode hold for this object since the intent is the server
4064  * is unexporting the specified directory.  Invalidation will prevent
4065  * this struct from being found in the future.
4066  */
4067 static void
4068 rfs4_file_walk_callout(rfs4_entry_t u_entry, void *e)
4069 {
4070 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
4071 	struct exportinfo *exi = (struct exportinfo *)e;
4072 	nfs_fh4_fmt_t   fhfmt4, *exi_fhp, *finfo_fhp;
4073 	fhandle_t *efhp;
4074 
4075 	efhp = (fhandle_t *)&exi->exi_fh;
4076 	exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4077 
4078 	FH_TO_FMT4(efhp, exi_fhp);
4079 
4080 	finfo_fhp = (nfs_fh4_fmt_t *)fp->rf_filehandle.nfs_fh4_val;
4081 
4082 	if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4083 	    bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4084 	    exi_fhp->fh4_xlen) == 0) {
4085 		if (fp->rf_vp) {
4086 			vnode_t *vp = fp->rf_vp;
4087 
4088 			/*
4089 			 * don't leak monitors and remove the reference
4090 			 * put on the vnode when the delegation was granted.
4091 			 */
4092 			if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ) {
4093 				(void) fem_uninstall(vp, deleg_rdops,
4094 				    (void *)fp);
4095 				vn_open_downgrade(vp, FREAD);
4096 			} else if (fp->rf_dinfo.rd_dtype ==
4097 			    OPEN_DELEGATE_WRITE) {
4098 				(void) fem_uninstall(vp, deleg_wrops,
4099 				    (void *)fp);
4100 				vn_open_downgrade(vp, FREAD|FWRITE);
4101 			}
4102 			mutex_enter(&vp->v_vsd_lock);
4103 			(void) vsd_set(vp, nfs4_srv_vkey, NULL);
4104 			mutex_exit(&vp->v_vsd_lock);
4105 			VN_RELE(vp);
4106 			fp->rf_vp = NULL;
4107 		}
4108 		rfs4_dbe_invalidate(fp->rf_dbe);
4109 	}
4110 }
4111 
4112 /*
4113  * Given a directory that is being unexported, cleanup/release all
4114  * state in the server that refers to objects residing underneath this
4115  * particular export.  The ordering of the release is important.
4116  * Lock_owner, then state and then file.
4117  *
4118  * NFS zones note: nfs_export.c:unexport() calls this from a
4119  * thread in the global zone for NGZ data structures, so we
4120  * CANNOT use zone_getspecific anywhere in this code path.
4121  */
4122 void
4123 rfs4_clean_state_exi(nfs_export_t *ne, struct exportinfo *exi)
4124 {
4125 	nfs_globals_t *ng;
4126 	nfs4_srv_t *nsrv4;
4127 
4128 	ng = ne->ne_globals;
4129 	ASSERT(ng->nfs_zoneid == exi->exi_zoneid);
4130 	nsrv4 = ng->nfs4_srv;
4131 
4132 	mutex_enter(&nsrv4->state_lock);
4133 
4134 	if (nsrv4->nfs4_server_state == NULL) {
4135 		mutex_exit(&nsrv4->state_lock);
4136 		return;
4137 	}
4138 
4139 	rfs4_dbe_walk(nsrv4->rfs4_lo_state_tab,
4140 	    rfs4_lo_state_walk_callout, exi);
4141 	rfs4_dbe_walk(nsrv4->rfs4_state_tab, rfs4_state_walk_callout, exi);
4142 	rfs4_dbe_walk(nsrv4->rfs4_deleg_state_tab,
4143 	    rfs4_deleg_state_walk_callout, exi);
4144 	rfs4_dbe_walk(nsrv4->rfs4_file_tab, rfs4_file_walk_callout, exi);
4145 
4146 	mutex_exit(&nsrv4->state_lock);
4147 }
4148