xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs4_state.c (revision d852f8ff587f58ad4c891927373169c1cec33ecf)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * Copyright 2018 Nexenta Systems, Inc.
28  * Copyright 2019 Nexenta by DDN, Inc.
29  * Copyright 2020 RackTop Systems, Inc.
30  * Copyright 2023 MNX Cloud, Inc.
31  */
32 
33 #include <sys/systm.h>
34 #include <sys/kmem.h>
35 #include <sys/cmn_err.h>
36 #include <sys/atomic.h>
37 #include <sys/clconf.h>
38 #include <sys/cladm.h>
39 #include <sys/flock.h>
40 #include <nfs/export.h>
41 #include <nfs/nfs.h>
42 #include <nfs/nfs4.h>
43 #include <nfs/nfssys.h>
44 #include <nfs/lm.h>
45 #include <sys/pathname.h>
46 #include <sys/sdt.h>
47 #include <sys/nvpair.h>
48 
49 extern u_longlong_t nfs4_srv_caller_id;
50 
51 extern uint_t nfs4_srv_vkey;
52 
53 stateid4 zero_stateid;		/* all zeros */
54 stateid4 one_stateid = {
55 	.seqid = ~0,
56 	.other = { ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }
57 };
58 stateid4 current_stateid = {
59 	.seqid = 1
60 };
61 stateid4 invalid_stateid = {
62 	.seqid = ~0
63 };
64 
65 #define	ZERO_STATEID(x) (!memcmp((x), &zero_stateid, sizeof (stateid4)))
66 #define	ONE_STATEID(x) (!memcmp((x), &one_stateid, sizeof (stateid4)))
67 #define	CURRENT_STATEID(x) (!memcmp((x), &current_stateid, sizeof (stateid4)))
68 
69 /* For embedding the cluster nodeid into our clientid */
70 #define	CLUSTER_NODEID_SHIFT	24
71 #define	CLUSTER_MAX_NODEID	255
72 
73 #ifdef DEBUG
74 int rfs4_debug;
75 #endif
76 
77 rfs4_db_mem_cache_t rfs4_db_mem_cache_table[RFS4_DB_MEM_CACHE_NUM];
78 static uint32_t rfs4_database_debug = 0x00;
79 
80 /* CSTYLED */
81 static void rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf);
82 static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf);
83 static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip);
84 static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip);
85 
86 void
put_stateid4(struct compound_state * cs,stateid4 * state)87 put_stateid4(struct compound_state *cs, stateid4 *state)
88 {
89 	if (*cs->statusp == NFS4_OK && cs->minorversion) {
90 		memcpy(&cs->current_stateid, state, sizeof (stateid4));
91 		cs->cs_flags |= RFS4_CURRENT_STATEID;
92 	}
93 }
94 
95 void
get_stateid4(struct compound_state * cs,stateid4 * state)96 get_stateid4(struct compound_state *cs, stateid4 *state)
97 {
98 	if ((cs->cs_flags & RFS4_CURRENT_STATEID) && CURRENT_STATEID(state)) {
99 		memcpy(state, &cs->current_stateid, sizeof (stateid4));
100 	}
101 }
102 
103 /*
104  * Couple of simple init/destroy functions for a general waiter
105  */
106 void
rfs4_sw_init(rfs4_state_wait_t * swp)107 rfs4_sw_init(rfs4_state_wait_t *swp)
108 {
109 	mutex_init(swp->sw_cv_lock, NULL, MUTEX_DEFAULT, NULL);
110 	cv_init(swp->sw_cv, NULL, CV_DEFAULT, NULL);
111 	swp->sw_active = FALSE;
112 	swp->sw_wait_count = 0;
113 }
114 
115 void
rfs4_sw_destroy(rfs4_state_wait_t * swp)116 rfs4_sw_destroy(rfs4_state_wait_t *swp)
117 {
118 	mutex_destroy(swp->sw_cv_lock);
119 	cv_destroy(swp->sw_cv);
120 }
121 
122 void
rfs4_sw_enter(rfs4_state_wait_t * swp)123 rfs4_sw_enter(rfs4_state_wait_t *swp)
124 {
125 	mutex_enter(swp->sw_cv_lock);
126 	while (swp->sw_active) {
127 		swp->sw_wait_count++;
128 		cv_wait(swp->sw_cv, swp->sw_cv_lock);
129 		swp->sw_wait_count--;
130 	}
131 	ASSERT(swp->sw_active == FALSE);
132 	swp->sw_active = TRUE;
133 	mutex_exit(swp->sw_cv_lock);
134 }
135 
136 void
rfs4_sw_exit(rfs4_state_wait_t * swp)137 rfs4_sw_exit(rfs4_state_wait_t *swp)
138 {
139 	mutex_enter(swp->sw_cv_lock);
140 	ASSERT(swp->sw_active == TRUE);
141 	swp->sw_active = FALSE;
142 	if (swp->sw_wait_count != 0)
143 		cv_broadcast(swp->sw_cv);
144 	mutex_exit(swp->sw_cv_lock);
145 }
146 
147 static void
deep_lock_copy(LOCK4res * dres,LOCK4res * sres)148 deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
149 {
150 	lock_owner4 *slo = &sres->LOCK4res_u.denied.owner;
151 	lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner;
152 
153 	if (sres->status == NFS4ERR_DENIED) {
154 		dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP);
155 		bcopy(slo->owner_val, dlo->owner_val, slo->owner_len);
156 	}
157 }
158 
159 /*
160  * CPR callback id -- not related to v4 callbacks
161  */
162 static callb_id_t cpr_id = 0;
163 
164 static void
deep_lock_free(LOCK4res * res)165 deep_lock_free(LOCK4res *res)
166 {
167 	lock_owner4 *lo = &res->LOCK4res_u.denied.owner;
168 
169 	if (res->status == NFS4ERR_DENIED)
170 		kmem_free(lo->owner_val, lo->owner_len);
171 }
172 
173 static void
deep_open_copy(OPEN4res * dres,OPEN4res * sres)174 deep_open_copy(OPEN4res *dres, OPEN4res *sres)
175 {
176 	nfsace4 *sacep, *dacep;
177 
178 	if (sres->status != NFS4_OK) {
179 		return;
180 	}
181 
182 	dres->attrset = sres->attrset;
183 
184 	switch (sres->delegation.delegation_type) {
185 	case OPEN_DELEGATE_NONE:
186 		return;
187 	case OPEN_DELEGATE_READ:
188 		sacep = &sres->delegation.open_delegation4_u.read.permissions;
189 		dacep = &dres->delegation.open_delegation4_u.read.permissions;
190 		break;
191 	case OPEN_DELEGATE_WRITE:
192 		sacep = &sres->delegation.open_delegation4_u.write.permissions;
193 		dacep = &dres->delegation.open_delegation4_u.write.permissions;
194 		break;
195 	}
196 	dacep->who.utf8string_val =
197 	    kmem_alloc(sacep->who.utf8string_len, KM_SLEEP);
198 	bcopy(sacep->who.utf8string_val, dacep->who.utf8string_val,
199 	    sacep->who.utf8string_len);
200 }
201 
202 static void
deep_open_free(OPEN4res * res)203 deep_open_free(OPEN4res *res)
204 {
205 	nfsace4 *acep;
206 	if (res->status != NFS4_OK)
207 		return;
208 
209 	switch (res->delegation.delegation_type) {
210 	case OPEN_DELEGATE_NONE:
211 		return;
212 	case OPEN_DELEGATE_READ:
213 		acep = &res->delegation.open_delegation4_u.read.permissions;
214 		break;
215 	case OPEN_DELEGATE_WRITE:
216 		acep = &res->delegation.open_delegation4_u.write.permissions;
217 		break;
218 	}
219 
220 	if (acep->who.utf8string_val) {
221 		kmem_free(acep->who.utf8string_val, acep->who.utf8string_len);
222 		acep->who.utf8string_val = NULL;
223 	}
224 }
225 
226 void
rfs4_free_reply(nfs_resop4 * rp)227 rfs4_free_reply(nfs_resop4 *rp)
228 {
229 	switch (rp->resop) {
230 	case OP_LOCK:
231 		deep_lock_free(&rp->nfs_resop4_u.oplock);
232 		break;
233 	case OP_OPEN:
234 		deep_open_free(&rp->nfs_resop4_u.opopen);
235 	default:
236 		break;
237 	}
238 }
239 
240 void
rfs4_copy_reply(nfs_resop4 * dst,nfs_resop4 * src)241 rfs4_copy_reply(nfs_resop4 *dst, nfs_resop4 *src)
242 {
243 	*dst = *src;
244 
245 	/* Handle responses that need deep copy */
246 	switch (src->resop) {
247 	case OP_LOCK:
248 		deep_lock_copy(&dst->nfs_resop4_u.oplock,
249 		    &src->nfs_resop4_u.oplock);
250 		break;
251 	case OP_OPEN:
252 		deep_open_copy(&dst->nfs_resop4_u.opopen,
253 		    &src->nfs_resop4_u.opopen);
254 		break;
255 	default:
256 		break;
257 	};
258 }
259 
260 /*
261  * This is the implementation of the underlying state engine. The
262  * public interface to this engine is described by
263  * nfs4_state.h. Callers to the engine should hold no state engine
264  * locks when they call in to it. If the protocol needs to lock data
265  * structures it should do so after acquiring all references to them
266  * first and then follow the following lock order:
267  *
268  *	client > openowner > state > lo_state > lockowner > file.
269  *
270  * Internally we only allow a thread to hold one hash bucket lock at a
271  * time and the lock is higher in the lock order (must be acquired
272  * first) than the data structure that is on that hash list.
273  *
274  * If a new reference was acquired by the caller, that reference needs
275  * to be released after releasing all acquired locks with the
276  * corresponding rfs4_*_rele routine.
277  */
278 
279 /*
280  * This code is some what prototypical for now. Its purpose currently is to
281  * implement the interfaces sufficiently to finish the higher protocol
282  * elements. This will be replaced by a dynamically resizeable tables
283  * backed by kmem_cache allocator. However synchronization is handled
284  * correctly (I hope) and will not change by much.  The mutexes for
285  * the hash buckets that can be used to create new instances of data
286  * structures  might be good candidates to evolve into reader writer
287  * locks. If it has to do a creation, it would be holding the
288  * mutex across a kmem_alloc with KM_SLEEP specified.
289  */
290 
291 #ifdef DEBUG
292 #define	TABSIZE 17
293 #else
294 #define	TABSIZE 2047
295 #endif
296 
297 #define	ADDRHASH(key) ((unsigned long)(key) >> 3)
298 
299 #define	MAXTABSZ 1024*1024
300 
301 /* The values below are rfs4_lease_time units */
302 
303 #ifdef DEBUG
304 #define	CLIENT_CACHE_TIME 1
305 #define	OPENOWNER_CACHE_TIME 1
306 #define	STATE_CACHE_TIME 1
307 #define	LO_STATE_CACHE_TIME 1
308 #define	LOCKOWNER_CACHE_TIME 1
309 #define	FILE_CACHE_TIME 3
310 #define	DELEG_STATE_CACHE_TIME 1
311 #else
312 #define	CLIENT_CACHE_TIME 10
313 #define	OPENOWNER_CACHE_TIME 5
314 #define	STATE_CACHE_TIME 1
315 #define	LO_STATE_CACHE_TIME 1
316 #define	LOCKOWNER_CACHE_TIME 3
317 #define	FILE_CACHE_TIME 40
318 #define	DELEG_STATE_CACHE_TIME 1
319 #endif
320 
321 /*
322  * NFSv4 server state databases
323  *
324  * Initilized when the module is loaded and used by NFSv4 state tables.
325  * These kmem_cache databases are global, the tables that make use of these
326  * are per zone.
327  */
328 kmem_cache_t *rfs4_client_mem_cache;
329 kmem_cache_t *rfs4_clntIP_mem_cache;
330 kmem_cache_t *rfs4_openown_mem_cache;
331 kmem_cache_t *rfs4_openstID_mem_cache;
332 kmem_cache_t *rfs4_lockstID_mem_cache;
333 kmem_cache_t *rfs4_lockown_mem_cache;
334 kmem_cache_t *rfs4_file_mem_cache;
335 kmem_cache_t *rfs4_delegstID_mem_cache;
336 kmem_cache_t *rfs4_session_mem_cache;
337 
338 /*
339  * NFSv4 state table functions
340  */
341 static bool_t rfs4_client_create(rfs4_entry_t, void *);
342 static void rfs4_dss_remove_cpleaf(rfs4_client_t *);
343 static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *);
344 static void rfs4_client_destroy(rfs4_entry_t);
345 static bool_t rfs4_client_expiry(rfs4_entry_t);
346 static uint32_t clientid_hash(void *);
347 static bool_t clientid_compare(rfs4_entry_t, void *);
348 static void *clientid_mkkey(rfs4_entry_t);
349 static uint32_t nfsclnt_hash(void *);
350 static bool_t nfsclnt_compare(rfs4_entry_t, void *);
351 static void *nfsclnt_mkkey(rfs4_entry_t);
352 static bool_t rfs4_clntip_expiry(rfs4_entry_t);
353 static void rfs4_clntip_destroy(rfs4_entry_t);
354 static bool_t rfs4_clntip_create(rfs4_entry_t, void *);
355 static uint32_t clntip_hash(void *);
356 static bool_t clntip_compare(rfs4_entry_t, void *);
357 static void *clntip_mkkey(rfs4_entry_t);
358 static bool_t rfs4_openowner_create(rfs4_entry_t, void *);
359 static void rfs4_openowner_destroy(rfs4_entry_t);
360 static bool_t rfs4_openowner_expiry(rfs4_entry_t);
361 static uint32_t openowner_hash(void *);
362 static bool_t openowner_compare(rfs4_entry_t, void *);
363 static void *openowner_mkkey(rfs4_entry_t);
364 static bool_t rfs4_state_create(rfs4_entry_t, void *);
365 static void rfs4_state_destroy(rfs4_entry_t);
366 static bool_t rfs4_state_expiry(rfs4_entry_t);
367 static uint32_t state_hash(void *);
368 static bool_t state_compare(rfs4_entry_t, void *);
369 static void *state_mkkey(rfs4_entry_t);
370 static uint32_t state_owner_file_hash(void *);
371 static bool_t state_owner_file_compare(rfs4_entry_t, void *);
372 static void *state_owner_file_mkkey(rfs4_entry_t);
373 static uint32_t state_file_hash(void *);
374 static bool_t state_file_compare(rfs4_entry_t, void *);
375 static void *state_file_mkkey(rfs4_entry_t);
376 static bool_t rfs4_lo_state_create(rfs4_entry_t, void *);
377 static void rfs4_lo_state_destroy(rfs4_entry_t);
378 static bool_t rfs4_lo_state_expiry(rfs4_entry_t);
379 static uint32_t lo_state_hash(void *);
380 static bool_t lo_state_compare(rfs4_entry_t, void *);
381 static void *lo_state_mkkey(rfs4_entry_t);
382 static uint32_t lo_state_lo_hash(void *);
383 static bool_t lo_state_lo_compare(rfs4_entry_t, void *);
384 static void *lo_state_lo_mkkey(rfs4_entry_t);
385 static bool_t rfs4_lockowner_create(rfs4_entry_t, void *);
386 static void rfs4_lockowner_destroy(rfs4_entry_t);
387 static bool_t rfs4_lockowner_expiry(rfs4_entry_t);
388 static uint32_t lockowner_hash(void *);
389 static bool_t lockowner_compare(rfs4_entry_t, void *);
390 static void *lockowner_mkkey(rfs4_entry_t);
391 static uint32_t pid_hash(void *);
392 static bool_t pid_compare(rfs4_entry_t, void *);
393 static void *pid_mkkey(rfs4_entry_t);
394 static bool_t rfs4_file_create(rfs4_entry_t, void *);
395 static void rfs4_file_destroy(rfs4_entry_t);
396 static uint32_t file_hash(void *);
397 static bool_t file_compare(rfs4_entry_t, void *);
398 static void *file_mkkey(rfs4_entry_t);
399 static bool_t rfs4_deleg_state_create(rfs4_entry_t, void *);
400 static void rfs4_deleg_state_destroy(rfs4_entry_t);
401 static bool_t rfs4_deleg_state_expiry(rfs4_entry_t);
402 static uint32_t deleg_hash(void *);
403 static bool_t deleg_compare(rfs4_entry_t, void *);
404 static void *deleg_mkkey(rfs4_entry_t);
405 static uint32_t deleg_state_hash(void *);
406 static bool_t deleg_state_compare(rfs4_entry_t, void *);
407 static void *deleg_state_mkkey(rfs4_entry_t);
408 
409 static int rfs4_ss_enabled = 0;
410 
411 void
rfs4_ss_pnfree(rfs4_ss_pn_t * ss_pn)412 rfs4_ss_pnfree(rfs4_ss_pn_t *ss_pn)
413 {
414 	kmem_free(ss_pn, sizeof (rfs4_ss_pn_t));
415 }
416 
417 static rfs4_ss_pn_t *
rfs4_ss_pnalloc(char * dir,char * leaf)418 rfs4_ss_pnalloc(char *dir, char *leaf)
419 {
420 	rfs4_ss_pn_t *ss_pn;
421 	int dir_len, leaf_len;
422 
423 	/*
424 	 * validate we have a resonable path
425 	 * (account for the '/' and trailing null)
426 	 */
427 	if ((dir_len = strlen(dir)) > MAXPATHLEN ||
428 	    (leaf_len = strlen(leaf)) > MAXNAMELEN ||
429 	    (dir_len + leaf_len + 2) > MAXPATHLEN) {
430 		return (NULL);
431 	}
432 
433 	ss_pn = kmem_alloc(sizeof (rfs4_ss_pn_t), KM_SLEEP);
434 
435 	(void) snprintf(ss_pn->pn, MAXPATHLEN, "%s/%s", dir, leaf);
436 	/* Handy pointer to just the leaf name */
437 	ss_pn->leaf = ss_pn->pn + dir_len + 1;
438 	return (ss_pn);
439 }
440 
441 
442 /*
443  * Move the "leaf" filename from "sdir" directory
444  * to the "ddir" directory. Return the pathname of
445  * the destination unless the rename fails in which
446  * case we need to return the source pathname.
447  */
448 static rfs4_ss_pn_t *
rfs4_ss_movestate(char * sdir,char * ddir,char * leaf)449 rfs4_ss_movestate(char *sdir, char *ddir, char *leaf)
450 {
451 	rfs4_ss_pn_t *src, *dst;
452 
453 	if ((src = rfs4_ss_pnalloc(sdir, leaf)) == NULL)
454 		return (NULL);
455 
456 	if ((dst = rfs4_ss_pnalloc(ddir, leaf)) == NULL) {
457 		rfs4_ss_pnfree(src);
458 		return (NULL);
459 	}
460 
461 	/*
462 	 * If the rename fails we shall return the src
463 	 * pathname and free the dst. Otherwise we need
464 	 * to free the src and return the dst pathanme.
465 	 */
466 	if (vn_rename(src->pn, dst->pn, UIO_SYSSPACE)) {
467 		rfs4_ss_pnfree(dst);
468 		return (src);
469 	}
470 	rfs4_ss_pnfree(src);
471 	return (dst);
472 }
473 
474 
475 static rfs4_oldstate_t *
rfs4_ss_getstate(vnode_t * dvp,rfs4_ss_pn_t * ss_pn)476 rfs4_ss_getstate(vnode_t *dvp, rfs4_ss_pn_t *ss_pn)
477 {
478 	struct uio uio;
479 	struct iovec iov[3];
480 
481 	rfs4_oldstate_t *cl_ss = NULL;
482 	vnode_t *vp;
483 	vattr_t va;
484 	uint_t id_len;
485 	int err, kill_file, file_vers;
486 
487 	if (ss_pn == NULL)
488 		return (NULL);
489 
490 	/*
491 	 * open the state file.
492 	 */
493 	if (vn_open(ss_pn->pn, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0) != 0) {
494 		return (NULL);
495 	}
496 
497 	if (vp->v_type != VREG) {
498 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
499 		VN_RELE(vp);
500 		return (NULL);
501 	}
502 
503 	err = VOP_ACCESS(vp, VREAD, 0, CRED(), NULL);
504 	if (err) {
505 		/*
506 		 * We don't have read access? better get the heck out.
507 		 */
508 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
509 		VN_RELE(vp);
510 		return (NULL);
511 	}
512 
513 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
514 	/*
515 	 * get the file size to do some basic validation
516 	 */
517 	va.va_mask = AT_SIZE;
518 	err = VOP_GETATTR(vp, &va, 0, CRED(), NULL);
519 
520 	kill_file = (va.va_size == 0 || va.va_size <
521 	    (NFS4_VERIFIER_SIZE + sizeof (uint_t)+1));
522 
523 	if (err || kill_file) {
524 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
525 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
526 		VN_RELE(vp);
527 		if (kill_file) {
528 			(void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0);
529 		}
530 		return (NULL);
531 	}
532 
533 	cl_ss = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
534 
535 	/*
536 	 * build iovecs to read in the file_version, verifier and id_len
537 	 */
538 	iov[0].iov_base = (caddr_t)&file_vers;
539 	iov[0].iov_len = sizeof (int);
540 	iov[1].iov_base = (caddr_t)&cl_ss->cl_id4.verifier;
541 	iov[1].iov_len = NFS4_VERIFIER_SIZE;
542 	iov[2].iov_base = (caddr_t)&id_len;
543 	iov[2].iov_len = sizeof (uint_t);
544 
545 	uio.uio_iov = iov;
546 	uio.uio_iovcnt = 3;
547 	uio.uio_segflg = UIO_SYSSPACE;
548 	uio.uio_loffset = 0;
549 	uio.uio_resid = sizeof (int) + NFS4_VERIFIER_SIZE + sizeof (uint_t);
550 
551 	err = VOP_READ(vp, &uio, FREAD, CRED(), NULL);
552 	if (err != 0) {
553 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
554 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
555 		VN_RELE(vp);
556 		kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
557 		return (NULL);
558 	}
559 
560 	/*
561 	 * if the file_version doesn't match or if the
562 	 * id_len is zero or the combination of the verifier,
563 	 * id_len and id_val is bigger than the file we have
564 	 * a problem. If so ditch the file.
565 	 */
566 	kill_file = (file_vers != NFS4_SS_VERSION || id_len == 0 ||
567 	    (id_len + NFS4_VERIFIER_SIZE + sizeof (uint_t)) > va.va_size);
568 
569 	if (err || kill_file) {
570 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
571 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
572 		VN_RELE(vp);
573 		kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
574 		if (kill_file) {
575 			(void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0);
576 		}
577 		return (NULL);
578 	}
579 
580 	/*
581 	 * now get the client id value
582 	 */
583 	cl_ss->cl_id4.id_val = kmem_alloc(id_len, KM_SLEEP);
584 	iov[0].iov_base = cl_ss->cl_id4.id_val;
585 	iov[0].iov_len = id_len;
586 
587 	uio.uio_iov = iov;
588 	uio.uio_iovcnt = 1;
589 	uio.uio_segflg = UIO_SYSSPACE;
590 	uio.uio_resid = cl_ss->cl_id4.id_len = id_len;
591 
592 	err = VOP_READ(vp, &uio, FREAD, CRED(), NULL);
593 	if (err != 0) {
594 		VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
595 		(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
596 		VN_RELE(vp);
597 		kmem_free(cl_ss->cl_id4.id_val, id_len);
598 		kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
599 		return (NULL);
600 	}
601 
602 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
603 	(void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
604 	VN_RELE(vp);
605 	return (cl_ss);
606 }
607 
608 #ifdef	nextdp
609 #undef nextdp
610 #endif
611 #define	nextdp(dp)	((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
612 
613 /*
614  * Check whether list already contains the client
615  * This protects against counting the same client twice.
616  */
617 static bool_t
rfs4_ss_has_client(rfs4_oldstate_t * head,nfs_client_id4 * client)618 rfs4_ss_has_client(rfs4_oldstate_t *head, nfs_client_id4 *client)
619 {
620 	rfs4_oldstate_t *p;
621 
622 	for (p = head->next; p != head; p = p->next) {
623 		nfs_client_id4 *m = &p->cl_id4;
624 
625 		if (m->id_len != client->id_len)
626 			continue;
627 
628 		if (bcmp(m->id_val, client->id_val, client->id_len) == 0)
629 			continue;
630 
631 		/* client ids match */
632 		return (TRUE);
633 	}
634 
635 	return (FALSE);
636 }
637 
638 /*
639  * Add entries from statedir to supplied oldstate list.
640  * Optionally, move all entries from statedir -> destdir.
641  */
642 static void
rfs4_ss_oldstate(rfs4_oldstate_t * oldstate,char * statedir,char * destdir)643 rfs4_ss_oldstate(rfs4_oldstate_t *oldstate, char *statedir, char *destdir)
644 {
645 	rfs4_ss_pn_t *ss_pn;
646 	rfs4_oldstate_t *cl_ss = NULL;
647 	char	*dirt = NULL;
648 	int	err, dir_eof = 0, size = 0;
649 	vnode_t *dvp;
650 	struct iovec iov;
651 	struct uio uio;
652 	struct dirent64 *dep;
653 	offset_t dirchunk_offset = 0;
654 	unsigned int nclients = 0;
655 
656 	/*
657 	 * open the state directory
658 	 */
659 	if (vn_open(statedir, UIO_SYSSPACE, FREAD, 0, &dvp, 0, 0))
660 		return;
661 
662 	if (dvp->v_type != VDIR || VOP_ACCESS(dvp, VREAD, 0, CRED(), NULL))
663 		goto out;
664 
665 	dirt = kmem_alloc(RFS4_SS_DIRSIZE, KM_SLEEP);
666 
667 	/*
668 	 * Get and process the directory entries
669 	 */
670 	while (!dir_eof) {
671 		(void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL);
672 		iov.iov_base = dirt;
673 		iov.iov_len = RFS4_SS_DIRSIZE;
674 		uio.uio_iov = &iov;
675 		uio.uio_iovcnt = 1;
676 		uio.uio_segflg = UIO_SYSSPACE;
677 		uio.uio_loffset = dirchunk_offset;
678 		uio.uio_resid = RFS4_SS_DIRSIZE;
679 
680 		err = VOP_READDIR(dvp, &uio, CRED(), &dir_eof, NULL, 0);
681 		VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
682 		if (err)
683 			goto out;
684 
685 		size = RFS4_SS_DIRSIZE - uio.uio_resid;
686 
687 		/*
688 		 * Process all the directory entries in this
689 		 * readdir chunk
690 		 */
691 		for (dep = (struct dirent64 *)dirt; size > 0;
692 		    dep = nextdp(dep)) {
693 
694 			size -= dep->d_reclen;
695 			dirchunk_offset = dep->d_off;
696 
697 			/*
698 			 * Skip '.' and '..'
699 			 */
700 			if (NFS_IS_DOTNAME(dep->d_name))
701 				continue;
702 
703 			ss_pn = rfs4_ss_pnalloc(statedir, dep->d_name);
704 			if (ss_pn == NULL)
705 				continue;
706 
707 			cl_ss = rfs4_ss_getstate(dvp, ss_pn);
708 			if (cl_ss != NULL) {
709 				if (destdir != NULL) {
710 					rfs4_ss_pnfree(ss_pn);
711 					cl_ss->ss_pn = rfs4_ss_movestate(
712 					    statedir, destdir, dep->d_name);
713 				} else {
714 					cl_ss->ss_pn = ss_pn;
715 				}
716 
717 				if (!rfs4_ss_has_client(oldstate,
718 				    &cl_ss->cl_id4))
719 					nclients++;
720 
721 				insque(cl_ss, oldstate);
722 			} else {
723 				rfs4_ss_pnfree(ss_pn);
724 			}
725 		}
726 	}
727 
728 out:
729 	(void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL);
730 	VN_RELE(dvp);
731 	if (dirt)
732 		kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE);
733 
734 	if (nclients > 0) {
735 		nfs4_srv_t *nsrv4 = nfs4_get_srv();
736 
737 		atomic_add_32(&(nsrv4->nfs4_cur_servinst->nreclaim), nclients);
738 	}
739 }
740 
741 static void
rfs4_ss_init(nfs4_srv_t * nsrv4)742 rfs4_ss_init(nfs4_srv_t *nsrv4)
743 {
744 	int npaths = 1;
745 	char *default_dss_path = NFS4_DSS_VAR_DIR;
746 
747 	/* read the default stable storage state */
748 	rfs4_dss_readstate(nsrv4, npaths, &default_dss_path);
749 
750 	rfs4_ss_enabled = 1;
751 }
752 
753 static void
rfs4_ss_fini(nfs4_srv_t * nsrv4)754 rfs4_ss_fini(nfs4_srv_t *nsrv4)
755 {
756 	rfs4_servinst_t *sip;
757 
758 	mutex_enter(&nsrv4->servinst_lock);
759 	sip = nsrv4->nfs4_cur_servinst;
760 	while (sip != NULL) {
761 		rfs4_dss_clear_oldstate(sip);
762 		sip = sip->next;
763 	}
764 	mutex_exit(&nsrv4->servinst_lock);
765 }
766 
767 /*
768  * Remove all oldstate files referenced by this servinst.
769  */
770 static void
rfs4_dss_clear_oldstate(rfs4_servinst_t * sip)771 rfs4_dss_clear_oldstate(rfs4_servinst_t *sip)
772 {
773 	rfs4_oldstate_t *os_head, *osp;
774 
775 	rw_enter(&sip->oldstate_lock, RW_WRITER);
776 	os_head = sip->oldstate;
777 
778 	if (os_head == NULL) {
779 		rw_exit(&sip->oldstate_lock);
780 		return;
781 	}
782 
783 	/* skip dummy entry */
784 	osp = os_head->next;
785 	while (osp != os_head) {
786 		char *leaf = osp->ss_pn->leaf;
787 		rfs4_oldstate_t *os_next;
788 
789 		rfs4_dss_remove_leaf(sip, NFS4_DSS_OLDSTATE_LEAF, leaf);
790 
791 		if (osp->cl_id4.id_val)
792 			kmem_free(osp->cl_id4.id_val, osp->cl_id4.id_len);
793 		rfs4_ss_pnfree(osp->ss_pn);
794 
795 		os_next = osp->next;
796 		remque(osp);
797 		kmem_free(osp, sizeof (rfs4_oldstate_t));
798 		osp = os_next;
799 	}
800 
801 	rw_exit(&sip->oldstate_lock);
802 }
803 
804 /*
805  * Form the state and oldstate paths, and read in the stable storage files.
806  */
807 void
rfs4_dss_readstate(nfs4_srv_t * nsrv4,int npaths,char ** paths)808 rfs4_dss_readstate(nfs4_srv_t *nsrv4, int npaths, char **paths)
809 {
810 	int i;
811 	char *state, *oldstate;
812 
813 	state = kmem_alloc(MAXPATHLEN, KM_SLEEP);
814 	oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP);
815 
816 	for (i = 0; i < npaths; i++) {
817 		char *path = paths[i];
818 
819 		(void) sprintf(state, "%s/%s", path, NFS4_DSS_STATE_LEAF);
820 		(void) sprintf(oldstate, "%s/%s", path, NFS4_DSS_OLDSTATE_LEAF);
821 
822 		/*
823 		 * Populate the current server instance's oldstate list.
824 		 *
825 		 * 1. Read stable storage data from old state directory,
826 		 *    leaving its contents alone.
827 		 *
828 		 * 2. Read stable storage data from state directory,
829 		 *    and move the latter's contents to old state
830 		 *    directory.
831 		 */
832 		rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate,
833 		    oldstate, NULL);
834 		rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate,
835 		    state, oldstate);
836 	}
837 
838 	kmem_free(state, MAXPATHLEN);
839 	kmem_free(oldstate, MAXPATHLEN);
840 }
841 
842 
843 /*
844  * Check if we are still in grace and if the client can be
845  * granted permission to perform reclaims.
846  */
847 void
rfs4_ss_chkclid(nfs4_srv_t * nsrv4,rfs4_client_t * cp)848 rfs4_ss_chkclid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
849 {
850 	rfs4_servinst_t *sip;
851 
852 	/*
853 	 * It should be sufficient to check the oldstate data for just
854 	 * this client's instance. However, since our per-instance
855 	 * client grouping is solely temporal, HA-NFSv4 RG failover
856 	 * might result in clients of the same RG being partitioned into
857 	 * separate instances.
858 	 *
859 	 * Until the client grouping is improved, we must check the
860 	 * oldstate data for all instances with an active grace period.
861 	 *
862 	 * This also serves as the mechanism to remove stale oldstate data.
863 	 * The first time we check an instance after its grace period has
864 	 * expired, the oldstate data should be cleared.
865 	 *
866 	 * Start at the current instance, and walk the list backwards
867 	 * to the first.
868 	 */
869 	mutex_enter(&nsrv4->servinst_lock);
870 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
871 		rfs4_ss_chkclid_sip(cp, sip);
872 
873 		/* if the above check found this client, we're done */
874 		if (cp->rc_can_reclaim)
875 			break;
876 	}
877 	mutex_exit(&nsrv4->servinst_lock);
878 }
879 
880 static void
rfs4_ss_chkclid_sip(rfs4_client_t * cp,rfs4_servinst_t * sip)881 rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip)
882 {
883 	rfs4_oldstate_t *osp, *os_head;
884 
885 	/* short circuit everything if this server instance has no oldstate */
886 	rw_enter(&sip->oldstate_lock, RW_READER);
887 	os_head = sip->oldstate;
888 	rw_exit(&sip->oldstate_lock);
889 	if (os_head == NULL)
890 		return;
891 
892 	/*
893 	 * If this server instance is no longer in a grace period then
894 	 * the client won't be able to reclaim. No further need for this
895 	 * instance's oldstate data, so it can be cleared.
896 	 */
897 	if (!rfs4_servinst_in_grace(sip))
898 		return;
899 
900 	/* this instance is still in grace; search for the clientid */
901 
902 	rw_enter(&sip->oldstate_lock, RW_READER);
903 
904 	os_head = sip->oldstate;
905 	/* skip dummy entry */
906 	osp = os_head->next;
907 	while (osp != os_head) {
908 		if (osp->cl_id4.id_len == cp->rc_nfs_client.id_len) {
909 			if (bcmp(osp->cl_id4.id_val, cp->rc_nfs_client.id_val,
910 			    osp->cl_id4.id_len) == 0) {
911 				cp->rc_can_reclaim = 1;
912 				break;
913 			}
914 		}
915 		osp = osp->next;
916 	}
917 
918 	rw_exit(&sip->oldstate_lock);
919 }
920 
921 /*
922  * Place client information into stable storage: 1/3.
923  * First, generate the leaf filename, from the client's IP address and
924  * the server-generated short-hand clientid.
925  */
926 void
rfs4_ss_clid(nfs4_srv_t * nsrv4,rfs4_client_t * cp)927 rfs4_ss_clid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
928 {
929 	const char *kinet_ntop6(uchar_t *, char *, size_t);
930 	char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
931 	struct sockaddr *ca;
932 	uchar_t *b;
933 
934 	if (rfs4_ss_enabled == 0) {
935 		return;
936 	}
937 
938 	buf[0] = 0;
939 
940 	ca = (struct sockaddr *)&cp->rc_addr;
941 
942 	/*
943 	 * Convert the caller's IP address to a dotted string
944 	 */
945 	if (ca->sa_family == AF_INET) {
946 		b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
947 		(void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF,
948 		    b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
949 	} else if (ca->sa_family == AF_INET6) {
950 		struct sockaddr_in6 *sin6;
951 
952 		sin6 = (struct sockaddr_in6 *)ca;
953 		(void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
954 		    buf, INET6_ADDRSTRLEN);
955 	}
956 
957 	(void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
958 	    (longlong_t)cp->rc_clientid);
959 	rfs4_ss_clid_write(nsrv4, cp, leaf);
960 }
961 
962 /*
963  * Place client information into stable storage: 2/3.
964  * DSS: distributed stable storage: the file may need to be written to
965  * multiple directories.
966  */
967 static void
rfs4_ss_clid_write(nfs4_srv_t * nsrv4,rfs4_client_t * cp,char * leaf)968 rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf)
969 {
970 	rfs4_servinst_t *sip;
971 
972 	/*
973 	 * It should be sufficient to write the leaf file to (all) DSS paths
974 	 * associated with just this client's instance. However, since our
975 	 * per-instance client grouping is solely temporal, HA-NFSv4 RG
976 	 * failover might result in us losing DSS data.
977 	 *
978 	 * Until the client grouping is improved, we must write the DSS data
979 	 * to all instances' paths. Start at the current instance, and
980 	 * walk the list backwards to the first.
981 	 */
982 	mutex_enter(&nsrv4->servinst_lock);
983 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
984 		int i, npaths = sip->dss_npaths;
985 
986 		/* write the leaf file to all DSS paths */
987 		for (i = 0; i < npaths; i++) {
988 			rfs4_dss_path_t *dss_path = sip->dss_paths[i];
989 
990 			/* HA-NFSv4 path might have been failed-away from us */
991 			if (dss_path == NULL)
992 				continue;
993 
994 			rfs4_ss_clid_write_one(cp, dss_path->path, leaf);
995 		}
996 	}
997 	mutex_exit(&nsrv4->servinst_lock);
998 }
999 
1000 /*
1001  * Place client information into stable storage: 3/3.
1002  * Write the stable storage data to the requested file.
1003  */
1004 static void
rfs4_ss_clid_write_one(rfs4_client_t * cp,char * dss_path,char * leaf)1005 rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf)
1006 {
1007 	int ioflag;
1008 	int file_vers = NFS4_SS_VERSION;
1009 	size_t dirlen;
1010 	struct uio uio;
1011 	struct iovec iov[4];
1012 	char *dir;
1013 	rfs4_ss_pn_t *ss_pn;
1014 	vnode_t *vp;
1015 	nfs_client_id4 *cl_id4 = &(cp->rc_nfs_client);
1016 
1017 	/* allow 2 extra bytes for '/' & NUL */
1018 	dirlen = strlen(dss_path) + strlen(NFS4_DSS_STATE_LEAF) + 2;
1019 	dir = kmem_alloc(dirlen, KM_SLEEP);
1020 	(void) sprintf(dir, "%s/%s", dss_path, NFS4_DSS_STATE_LEAF);
1021 
1022 	ss_pn = rfs4_ss_pnalloc(dir, leaf);
1023 	/* rfs4_ss_pnalloc takes its own copy */
1024 	kmem_free(dir, dirlen);
1025 	if (ss_pn == NULL)
1026 		return;
1027 
1028 	if (vn_open(ss_pn->pn, UIO_SYSSPACE, FCREAT|FWRITE, 0600, &vp,
1029 	    CRCREAT, 0)) {
1030 		rfs4_ss_pnfree(ss_pn);
1031 		return;
1032 	}
1033 
1034 	/*
1035 	 * We need to record leaf - i.e. the filename - so that we know
1036 	 * what to remove, in the future. However, the dir part of cp->ss_pn
1037 	 * should never be referenced directly, since it's potentially only
1038 	 * one of several paths with this leaf in it.
1039 	 */
1040 	if (cp->rc_ss_pn != NULL) {
1041 		if (strcmp(cp->rc_ss_pn->leaf, leaf) == 0) {
1042 			/* we've already recorded *this* leaf */
1043 			rfs4_ss_pnfree(ss_pn);
1044 		} else {
1045 			/* replace with this leaf */
1046 			rfs4_ss_pnfree(cp->rc_ss_pn);
1047 			cp->rc_ss_pn = ss_pn;
1048 		}
1049 	} else {
1050 		cp->rc_ss_pn = ss_pn;
1051 	}
1052 
1053 	/*
1054 	 * Build a scatter list that points to the nfs_client_id4
1055 	 */
1056 	iov[0].iov_base = (caddr_t)&file_vers;
1057 	iov[0].iov_len = sizeof (int);
1058 	iov[1].iov_base = (caddr_t)&(cl_id4->verifier);
1059 	iov[1].iov_len = NFS4_VERIFIER_SIZE;
1060 	iov[2].iov_base = (caddr_t)&(cl_id4->id_len);
1061 	iov[2].iov_len = sizeof (uint_t);
1062 	iov[3].iov_base = (caddr_t)cl_id4->id_val;
1063 	iov[3].iov_len = cl_id4->id_len;
1064 
1065 	uio.uio_iov = iov;
1066 	uio.uio_iovcnt = 4;
1067 	uio.uio_loffset = 0;
1068 	uio.uio_segflg = UIO_SYSSPACE;
1069 	uio.uio_llimit = (rlim64_t)MAXOFFSET_T;
1070 	uio.uio_resid = cl_id4->id_len + sizeof (int) +
1071 	    NFS4_VERIFIER_SIZE + sizeof (uint_t);
1072 
1073 	ioflag = uio.uio_fmode = (FWRITE|FSYNC);
1074 	uio.uio_extflg = UIO_COPY_DEFAULT;
1075 
1076 	(void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1077 	/* write the full client id to the file. */
1078 	(void) VOP_WRITE(vp, &uio, ioflag, CRED(), NULL);
1079 	VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1080 
1081 	(void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
1082 	VN_RELE(vp);
1083 }
1084 
1085 /*
1086  * DSS: distributed stable storage.
1087  * Unpack the list of paths passed by nfsd.
1088  * Use nvlist_alloc(9F) to manage the data.
1089  * The caller is responsible for allocating and freeing the buffer.
1090  */
1091 int
rfs4_dss_setpaths(char * buf,size_t buflen)1092 rfs4_dss_setpaths(char *buf, size_t buflen)
1093 {
1094 	int error;
1095 
1096 	/*
1097 	 * If this is a "warm start", i.e. we previously had DSS paths,
1098 	 * preserve the old paths.
1099 	 */
1100 	if (rfs4_dss_paths != NULL) {
1101 		/*
1102 		 * Before we lose the ptr, destroy the nvlist and pathnames
1103 		 * array from the warm start before this one.
1104 		 */
1105 		nvlist_free(rfs4_dss_oldpaths);
1106 		rfs4_dss_oldpaths = rfs4_dss_paths;
1107 	}
1108 
1109 	/* unpack the buffer into a searchable nvlist */
1110 	error = nvlist_unpack(buf, buflen, &rfs4_dss_paths, KM_SLEEP);
1111 	if (error)
1112 		return (error);
1113 
1114 	/*
1115 	 * Search the nvlist for the pathnames nvpair (which is the only nvpair
1116 	 * in the list, and record its location.
1117 	 */
1118 	error = nvlist_lookup_string_array(rfs4_dss_paths, NFS4_DSS_NVPAIR_NAME,
1119 	    &rfs4_dss_newpaths, &rfs4_dss_numnewpaths);
1120 	return (error);
1121 }
1122 
1123 /*
1124  * Ultimately the nfssys() call NFS4_CLR_STATE endsup here
1125  * to find and mark the client for forced expire.
1126  */
1127 static void
rfs4_client_scrub(rfs4_entry_t ent,void * arg)1128 rfs4_client_scrub(rfs4_entry_t ent, void *arg)
1129 {
1130 	rfs4_client_t *cp = (rfs4_client_t *)ent;
1131 	struct nfs4clrst_args *clr = arg;
1132 	struct sockaddr_in6 *ent_sin6;
1133 	struct in6_addr  clr_in6;
1134 	struct sockaddr_in  *ent_sin;
1135 	struct in_addr   clr_in;
1136 
1137 	if (clr->addr_type != cp->rc_addr.ss_family) {
1138 		return;
1139 	}
1140 
1141 	switch (clr->addr_type) {
1142 
1143 	case AF_INET6:
1144 		/* copyin the address from user space */
1145 		if (copyin(clr->ap, &clr_in6, sizeof (clr_in6))) {
1146 			break;
1147 		}
1148 
1149 		ent_sin6 = (struct sockaddr_in6 *)&cp->rc_addr;
1150 
1151 		/*
1152 		 * now compare, and if equivalent mark entry
1153 		 * for forced expiration
1154 		 */
1155 		if (IN6_ARE_ADDR_EQUAL(&ent_sin6->sin6_addr, &clr_in6)) {
1156 			cp->rc_forced_expire = 1;
1157 		}
1158 		break;
1159 
1160 	case AF_INET:
1161 		/* copyin the address from user space */
1162 		if (copyin(clr->ap, &clr_in, sizeof (clr_in))) {
1163 			break;
1164 		}
1165 
1166 		ent_sin = (struct sockaddr_in *)&cp->rc_addr;
1167 
1168 		/*
1169 		 * now compare, and if equivalent mark entry
1170 		 * for forced expiration
1171 		 */
1172 		if (ent_sin->sin_addr.s_addr == clr_in.s_addr) {
1173 			cp->rc_forced_expire = 1;
1174 		}
1175 		break;
1176 
1177 	default:
1178 		/* force this assert to fail */
1179 		ASSERT(clr->addr_type != clr->addr_type);
1180 	}
1181 }
1182 
1183 /*
1184  * This is called from nfssys() in order to clear server state
1185  * for the specified client IP Address.
1186  */
1187 int
rfs4_clear_client_state(struct nfs4clrst_args * clr)1188 rfs4_clear_client_state(struct nfs4clrst_args *clr)
1189 {
1190 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
1191 	int rc;
1192 
1193 	/* Once nfssrv is loaded, every zone should have one of these. */
1194 	VERIFY(nsrv4 != NULL);
1195 
1196 	mutex_enter(&nsrv4->state_lock);
1197 	/*
1198 	 * But only after NFS service is running is the nfs4_server_state
1199 	 * around. It's dirty (and needs the state_lock held), but all of the
1200 	 * databases live deep in the nfs4_server_state, so it's the only thing
1201 	 * to legitimately check prior to using anything. The pointers
1202 	 * themselves may be stale.
1203 	 */
1204 	if (nsrv4->nfs4_server_state != NULL) {
1205 		VERIFY(nsrv4->rfs4_client_tab != NULL);
1206 		rfs4_dbe_walk(nsrv4->rfs4_client_tab, rfs4_client_scrub, clr);
1207 		rc = 0;
1208 	} else {
1209 		rc = ENXIO;
1210 	}
1211 	mutex_exit(&nsrv4->state_lock);
1212 	return (rc);
1213 }
1214 
1215 /*
1216  * Used to initialize the NFSv4 server's state or database.  All of
1217  * the tables are created and timers are set.
1218  */
1219 void
rfs4_state_g_init(void)1220 rfs4_state_g_init(void)
1221 {
1222 	extern boolean_t rfs4_cpr_callb(void *, int);
1223 	/*
1224 	 * Add a CPR callback so that we can update client
1225 	 * access times to extend the lease after a suspend
1226 	 * and resume (using the same class as rpcmod/connmgr)
1227 	 */
1228 	cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");
1229 
1230 	/*
1231 	 * NFSv4 server state databases
1232 	 *
1233 	 * Initialized when the module is loaded and used by NFSv4 state
1234 	 * tables.  These kmem_cache free pools are used globally, the NFSv4
1235 	 * state tables which make use of these kmem_cache free pools are per
1236 	 * zone.
1237 	 *
1238 	 * initialize the global kmem_cache free pools which will be used by
1239 	 * the NFSv4 state tables.
1240 	 */
1241 	rfs4_client_mem_cache = nfs4_init_mem_cache("Client_entry_cache",
1242 	    2, sizeof (rfs4_client_t), 0);
1243 	rfs4_clntIP_mem_cache = nfs4_init_mem_cache("ClntIP_entry_cache",
1244 	    1, sizeof (rfs4_clntip_t), 1);
1245 	rfs4_openown_mem_cache = nfs4_init_mem_cache("OpenOwner_entry_cache",
1246 	    1, sizeof (rfs4_openowner_t), 2);
1247 	rfs4_openstID_mem_cache = nfs4_init_mem_cache("OpenStateID_entry_cache",
1248 	    3, sizeof (rfs4_state_t), 3);
1249 	rfs4_lockstID_mem_cache = nfs4_init_mem_cache("LockStateID_entry_cache",
1250 	    3, sizeof (rfs4_lo_state_t), 4);
1251 	rfs4_lockown_mem_cache = nfs4_init_mem_cache("Lockowner_entry_cache",
1252 	    2, sizeof (rfs4_lockowner_t), 5);
1253 	rfs4_file_mem_cache = nfs4_init_mem_cache("File_entry_cache",
1254 	    1, sizeof (rfs4_file_t), 6);
1255 	rfs4_delegstID_mem_cache =
1256 	    nfs4_init_mem_cache("DelegStateID_entry_cache", 2,
1257 	    sizeof (rfs4_deleg_state_t), 7);
1258 	rfs4_session_mem_cache = nfs4_init_mem_cache("Session_entry_cache",
1259 	    1, sizeof (rfs4_session_t), 8);
1260 }
1261 
1262 
1263 /*
1264  * Used at server shutdown to cleanup all of the NFSv4 server's structures
1265  * and other state.
1266  */
1267 void
rfs4_state_g_fini(void)1268 rfs4_state_g_fini(void)
1269 {
1270 	int i;
1271 	/*
1272 	 * Cleanup the CPR callback.
1273 	 */
1274 	if (cpr_id)
1275 		(void) callb_delete(cpr_id);
1276 
1277 	/* free the NFSv4 state databases */
1278 	for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
1279 		kmem_cache_destroy(rfs4_db_mem_cache_table[i].r_db_mem_cache);
1280 		rfs4_db_mem_cache_table[i].r_db_mem_cache = NULL;
1281 	}
1282 
1283 	rfs4_client_mem_cache = NULL;
1284 	rfs4_clntIP_mem_cache = NULL;
1285 	rfs4_openown_mem_cache = NULL;
1286 	rfs4_openstID_mem_cache = NULL;
1287 	rfs4_lockstID_mem_cache = NULL;
1288 	rfs4_lockown_mem_cache = NULL;
1289 	rfs4_file_mem_cache = NULL;
1290 	rfs4_delegstID_mem_cache = NULL;
1291 	rfs4_session_mem_cache = NULL;
1292 
1293 	/* DSS: distributed stable storage */
1294 	nvlist_free(rfs4_dss_oldpaths);
1295 	nvlist_free(rfs4_dss_paths);
1296 	rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
1297 }
1298 
1299 /*
1300  * Used to initialize the per zone NFSv4 server's state
1301  */
1302 void
rfs4_state_zone_init(nfs4_srv_t * nsrv4)1303 rfs4_state_zone_init(nfs4_srv_t *nsrv4)
1304 {
1305 	time_t start_time;
1306 	int start_grace;
1307 	char *dss_path = NFS4_DSS_VAR_DIR;
1308 
1309 	/* DSS: distributed stable storage: initialise served paths list */
1310 	nsrv4->dss_pathlist = NULL;
1311 
1312 	/*
1313 	 * Set the boot time.  If the server
1314 	 * has been restarted quickly and has had the opportunity to
1315 	 * service clients, then the start_time needs to be bumped
1316 	 * regardless.  A small window but it exists...
1317 	 */
1318 	start_time = gethrestime_sec();
1319 	if (nsrv4->rfs4_start_time < start_time)
1320 		nsrv4->rfs4_start_time = start_time;
1321 	else
1322 		nsrv4->rfs4_start_time++;
1323 
1324 	/*
1325 	 * Create the first server instance, or a new one if the server has
1326 	 * been restarted; see above comments on rfs4_start_time. Don't
1327 	 * start its grace period; that will be done later, to maximise the
1328 	 * clients' recovery window.
1329 	 */
1330 	start_grace = 0;
1331 	if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
1332 		int i;
1333 		char **dss_allpaths = NULL;
1334 		dss_allpaths = kmem_alloc(sizeof (char *) *
1335 		    (rfs4_dss_numnewpaths + 1), KM_SLEEP);
1336 		/*
1337 		 * Add the default path into the list of paths for saving
1338 		 * state informantion.
1339 		 */
1340 		dss_allpaths[0] = dss_path;
1341 		for (i = 0; i < rfs4_dss_numnewpaths; i++) {
1342 			dss_allpaths[i + 1] = rfs4_dss_newpaths[i];
1343 		}
1344 		rfs4_servinst_create(nsrv4, start_grace,
1345 		    (rfs4_dss_numnewpaths + 1), dss_allpaths);
1346 		kmem_free(dss_allpaths,
1347 		    (sizeof (char *) * (rfs4_dss_numnewpaths + 1)));
1348 	} else {
1349 		rfs4_servinst_create(nsrv4, start_grace, 1, &dss_path);
1350 	}
1351 
1352 	/* reset the "first NFSv4 request" status */
1353 	nsrv4->seen_first_compound = 0;
1354 
1355 	mutex_enter(&nsrv4->state_lock);
1356 
1357 	/*
1358 	 * If the server state database has already been initialized,
1359 	 * skip it
1360 	 */
1361 	if (nsrv4->nfs4_server_state != NULL) {
1362 		mutex_exit(&nsrv4->state_lock);
1363 		return;
1364 	}
1365 
1366 	rw_init(&nsrv4->rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);
1367 
1368 	/* set the various cache timers for table creation */
1369 	if (nsrv4->rfs4_client_cache_time == 0)
1370 		nsrv4->rfs4_client_cache_time = CLIENT_CACHE_TIME;
1371 	if (nsrv4->rfs4_openowner_cache_time == 0)
1372 		nsrv4->rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
1373 	if (nsrv4->rfs4_state_cache_time == 0)
1374 		nsrv4->rfs4_state_cache_time = STATE_CACHE_TIME;
1375 	if (nsrv4->rfs4_lo_state_cache_time == 0)
1376 		nsrv4->rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
1377 	if (nsrv4->rfs4_lockowner_cache_time == 0)
1378 		nsrv4->rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
1379 	if (nsrv4->rfs4_file_cache_time == 0)
1380 		nsrv4->rfs4_file_cache_time = FILE_CACHE_TIME;
1381 	if (nsrv4->rfs4_deleg_state_cache_time == 0)
1382 		nsrv4->rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
1383 
1384 	/* Create the overall database to hold all server state */
1385 	nsrv4->nfs4_server_state = rfs4_database_create(rfs4_database_debug);
1386 
1387 	/* Now create the individual tables */
1388 	nsrv4->rfs4_client_cache_time *= rfs4_lease_time;
1389 	nsrv4->rfs4_client_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1390 	    "Client",
1391 	    nsrv4->rfs4_client_cache_time,
1392 	    2,
1393 	    rfs4_client_create,
1394 	    rfs4_client_destroy,
1395 	    rfs4_client_expiry,
1396 	    sizeof (rfs4_client_t),
1397 	    TABSIZE,
1398 	    MAXTABSZ/8, 100);
1399 	nsrv4->rfs4_nfsclnt_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1400 	    "nfs_client_id4", nfsclnt_hash,
1401 	    nfsclnt_compare, nfsclnt_mkkey,
1402 	    TRUE);
1403 	nsrv4->rfs4_clientid_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1404 	    "client_id", clientid_hash,
1405 	    clientid_compare, clientid_mkkey,
1406 	    FALSE);
1407 
1408 	nsrv4->rfs4_clntip_cache_time = 86400 * 365;	/* about a year */
1409 	nsrv4->rfs4_clntip_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1410 	    "ClntIP",
1411 	    nsrv4->rfs4_clntip_cache_time,
1412 	    1,
1413 	    rfs4_clntip_create,
1414 	    rfs4_clntip_destroy,
1415 	    rfs4_clntip_expiry,
1416 	    sizeof (rfs4_clntip_t),
1417 	    TABSIZE,
1418 	    MAXTABSZ, 100);
1419 	nsrv4->rfs4_clntip_idx = rfs4_index_create(nsrv4->rfs4_clntip_tab,
1420 	    "client_ip", clntip_hash,
1421 	    clntip_compare, clntip_mkkey,
1422 	    TRUE);
1423 
1424 	nsrv4->rfs4_openowner_cache_time *= rfs4_lease_time;
1425 	nsrv4->rfs4_openowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1426 	    "OpenOwner",
1427 	    nsrv4->rfs4_openowner_cache_time,
1428 	    1,
1429 	    rfs4_openowner_create,
1430 	    rfs4_openowner_destroy,
1431 	    rfs4_openowner_expiry,
1432 	    sizeof (rfs4_openowner_t),
1433 	    TABSIZE,
1434 	    MAXTABSZ, 100);
1435 	nsrv4->rfs4_openowner_idx = rfs4_index_create(nsrv4->rfs4_openowner_tab,
1436 	    "open_owner4", openowner_hash,
1437 	    openowner_compare,
1438 	    openowner_mkkey, TRUE);
1439 
1440 	nsrv4->rfs4_state_cache_time *= rfs4_lease_time;
1441 	nsrv4->rfs4_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1442 	    "OpenStateID",
1443 	    nsrv4->rfs4_state_cache_time,
1444 	    3,
1445 	    rfs4_state_create,
1446 	    rfs4_state_destroy,
1447 	    rfs4_state_expiry,
1448 	    sizeof (rfs4_state_t),
1449 	    TABSIZE,
1450 	    MAXTABSZ, 100);
1451 
1452 	/* CSTYLED */
1453 	nsrv4->rfs4_state_owner_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1454 	    "Openowner-File",
1455 	    state_owner_file_hash,
1456 	    state_owner_file_compare,
1457 	    state_owner_file_mkkey, TRUE);
1458 
1459 	nsrv4->rfs4_state_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1460 	    "State-id", state_hash,
1461 	    state_compare, state_mkkey, FALSE);
1462 
1463 	nsrv4->rfs4_state_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1464 	    "File", state_file_hash,
1465 	    state_file_compare, state_file_mkkey,
1466 	    FALSE);
1467 
1468 	nsrv4->rfs4_lo_state_cache_time *= rfs4_lease_time;
1469 	nsrv4->rfs4_lo_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1470 	    "LockStateID",
1471 	    nsrv4->rfs4_lo_state_cache_time,
1472 	    2,
1473 	    rfs4_lo_state_create,
1474 	    rfs4_lo_state_destroy,
1475 	    rfs4_lo_state_expiry,
1476 	    sizeof (rfs4_lo_state_t),
1477 	    TABSIZE,
1478 	    MAXTABSZ, 100);
1479 
1480 	/* CSTYLED */
1481 	nsrv4->rfs4_lo_state_owner_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1482 	    "lockownerxstate",
1483 	    lo_state_lo_hash,
1484 	    lo_state_lo_compare,
1485 	    lo_state_lo_mkkey, TRUE);
1486 
1487 	nsrv4->rfs4_lo_state_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1488 	    "State-id",
1489 	    lo_state_hash, lo_state_compare,
1490 	    lo_state_mkkey, FALSE);
1491 
1492 	nsrv4->rfs4_lockowner_cache_time *= rfs4_lease_time;
1493 
1494 	nsrv4->rfs4_lockowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1495 	    "Lockowner",
1496 	    nsrv4->rfs4_lockowner_cache_time,
1497 	    2,
1498 	    rfs4_lockowner_create,
1499 	    rfs4_lockowner_destroy,
1500 	    rfs4_lockowner_expiry,
1501 	    sizeof (rfs4_lockowner_t),
1502 	    TABSIZE,
1503 	    MAXTABSZ, 100);
1504 
1505 	nsrv4->rfs4_lockowner_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1506 	    "lock_owner4", lockowner_hash,
1507 	    lockowner_compare,
1508 	    lockowner_mkkey, TRUE);
1509 
1510 	/* CSTYLED */
1511 	nsrv4->rfs4_lockowner_pid_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1512 	    "pid", pid_hash,
1513 	    pid_compare, pid_mkkey,
1514 	    FALSE);
1515 
1516 	nsrv4->rfs4_file_cache_time *= rfs4_lease_time;
1517 	nsrv4->rfs4_file_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1518 	    "File",
1519 	    nsrv4->rfs4_file_cache_time,
1520 	    1,
1521 	    rfs4_file_create,
1522 	    rfs4_file_destroy,
1523 	    NULL,
1524 	    sizeof (rfs4_file_t),
1525 	    TABSIZE,
1526 	    MAXTABSZ, -1);
1527 
1528 	nsrv4->rfs4_file_idx = rfs4_index_create(nsrv4->rfs4_file_tab,
1529 	    "Filehandle", file_hash,
1530 	    file_compare, file_mkkey, TRUE);
1531 
1532 	nsrv4->rfs4_deleg_state_cache_time *= rfs4_lease_time;
1533 	/* CSTYLED */
1534 	nsrv4->rfs4_deleg_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1535 	    "DelegStateID",
1536 	    nsrv4->rfs4_deleg_state_cache_time,
1537 	    2,
1538 	    rfs4_deleg_state_create,
1539 	    rfs4_deleg_state_destroy,
1540 	    rfs4_deleg_state_expiry,
1541 	    sizeof (rfs4_deleg_state_t),
1542 	    TABSIZE,
1543 	    MAXTABSZ, 100);
1544 	nsrv4->rfs4_deleg_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1545 	    "DelegByFileClient",
1546 	    deleg_hash,
1547 	    deleg_compare,
1548 	    deleg_mkkey, TRUE);
1549 
1550 	/* CSTYLED */
1551 	nsrv4->rfs4_deleg_state_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1552 	    "DelegState",
1553 	    deleg_state_hash,
1554 	    deleg_state_compare,
1555 	    deleg_state_mkkey, FALSE);
1556 
1557 	rfs4x_state_init_locked(nsrv4);
1558 
1559 	mutex_exit(&nsrv4->state_lock);
1560 
1561 	/*
1562 	 * Init the stable storage.
1563 	 */
1564 	rfs4_ss_init(nsrv4);
1565 }
1566 
1567 /*
1568  * Used at server shutdown to cleanup all of NFSv4 server's zone structures
1569  * and state.
1570  */
1571 void
rfs4_state_zone_fini(void)1572 rfs4_state_zone_fini(void)
1573 {
1574 	rfs4_database_t *dbp;
1575 	nfs4_srv_t *nsrv4;
1576 	nsrv4 = nfs4_get_srv();
1577 
1578 	rfs4_set_deleg_policy(nsrv4, SRV_NEVER_DELEGATE);
1579 
1580 	/*
1581 	 * Clean up any dangling stable storage structures BEFORE calling
1582 	 * rfs4_servinst_destroy_all() so there are no dangling structures
1583 	 * (i.e. the srvinsts are all cleared of danglers BEFORE they get
1584 	 * freed).
1585 	 */
1586 	rfs4_ss_fini(nsrv4);
1587 
1588 	mutex_enter(&nsrv4->state_lock);
1589 
1590 	if (nsrv4->nfs4_server_state == NULL) {
1591 		mutex_exit(&nsrv4->state_lock);
1592 		return;
1593 	}
1594 
1595 	rfs4x_state_fini(nsrv4);
1596 
1597 	/* destroy server instances and current instance ptr */
1598 	rfs4_servinst_destroy_all(nsrv4);
1599 
1600 	/* reset the "first NFSv4 request" status */
1601 	nsrv4->seen_first_compound = 0;
1602 
1603 	dbp = nsrv4->nfs4_server_state;
1604 	nsrv4->nfs4_server_state = NULL;
1605 
1606 	rw_destroy(&nsrv4->rfs4_findclient_lock);
1607 
1608 	/* First stop all of the reaper threads in the database */
1609 	rfs4_database_shutdown(dbp);
1610 
1611 	/*
1612 	 * WARNING: There may be consumers of the rfs4 database still
1613 	 * active as we destroy these.  IF that's the case, consider putting
1614 	 * some of their _zone_fini()-like functions into the zsd key as
1615 	 * ~~SHUTDOWN~~ functions instead of ~~DESTROY~~ functions.  We can
1616 	 * maintain some ordering guarantees better that way.
1617 	 */
1618 	/* Now destroy/release the database tables */
1619 	rfs4_database_destroy(dbp);
1620 
1621 	/* Reset the cache timers for next time */
1622 	nsrv4->rfs4_client_cache_time = 0;
1623 	nsrv4->rfs4_openowner_cache_time = 0;
1624 	nsrv4->rfs4_state_cache_time = 0;
1625 	nsrv4->rfs4_lo_state_cache_time = 0;
1626 	nsrv4->rfs4_lockowner_cache_time = 0;
1627 	nsrv4->rfs4_file_cache_time = 0;
1628 	nsrv4->rfs4_deleg_state_cache_time = 0;
1629 
1630 	mutex_exit(&nsrv4->state_lock);
1631 }
1632 
1633 typedef union {
1634 	struct {
1635 		uint32_t start_time;
1636 		uint32_t c_id;
1637 	} impl_id;
1638 	clientid4 id4;
1639 } cid;
1640 
1641 static int foreign_stateid(stateid_t *id);
1642 static int foreign_clientid(cid *cidp);
1643 static void embed_nodeid(cid *cidp);
1644 
1645 typedef union {
1646 	struct {
1647 		uint32_t c_id;
1648 		uint32_t gen_num;
1649 	} cv_impl;
1650 	verifier4	confirm_verf;
1651 } scid_confirm_verf;
1652 
1653 static uint32_t
clientid_hash(void * key)1654 clientid_hash(void *key)
1655 {
1656 	cid *idp = key;
1657 
1658 	return (idp->impl_id.c_id);
1659 }
1660 
1661 static bool_t
clientid_compare(rfs4_entry_t entry,void * key)1662 clientid_compare(rfs4_entry_t entry, void *key)
1663 {
1664 	rfs4_client_t *cp = (rfs4_client_t *)entry;
1665 	clientid4 *idp = key;
1666 
1667 	return (*idp == cp->rc_clientid);
1668 }
1669 
1670 static void *
clientid_mkkey(rfs4_entry_t entry)1671 clientid_mkkey(rfs4_entry_t entry)
1672 {
1673 	rfs4_client_t *cp = (rfs4_client_t *)entry;
1674 
1675 	return (&cp->rc_clientid);
1676 }
1677 
1678 static uint32_t
nfsclnt_hash(void * key)1679 nfsclnt_hash(void *key)
1680 {
1681 	nfs_client_id4 *client = key;
1682 	int i;
1683 	uint32_t hash = 0;
1684 
1685 	for (i = 0; i < client->id_len; i++) {
1686 		hash <<= 1;
1687 		hash += (uint_t)client->id_val[i];
1688 	}
1689 	return (hash);
1690 }
1691 
1692 
1693 static bool_t
nfsclnt_compare(rfs4_entry_t entry,void * key)1694 nfsclnt_compare(rfs4_entry_t entry, void *key)
1695 {
1696 	rfs4_client_t *cp = (rfs4_client_t *)entry;
1697 	nfs_client_id4 *nfs_client = key;
1698 
1699 	if (cp->rc_nfs_client.id_len != nfs_client->id_len)
1700 		return (FALSE);
1701 
1702 	return (bcmp(cp->rc_nfs_client.id_val, nfs_client->id_val,
1703 	    nfs_client->id_len) == 0);
1704 }
1705 
1706 static void *
nfsclnt_mkkey(rfs4_entry_t entry)1707 nfsclnt_mkkey(rfs4_entry_t entry)
1708 {
1709 	rfs4_client_t *cp = (rfs4_client_t *)entry;
1710 
1711 	return (&cp->rc_nfs_client);
1712 }
1713 
1714 static bool_t
rfs4_client_expiry(rfs4_entry_t u_entry)1715 rfs4_client_expiry(rfs4_entry_t u_entry)
1716 {
1717 	rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1718 	bool_t cp_expired;
1719 
1720 	if (rfs4_dbe_is_invalid(cp->rc_dbe)) {
1721 		cp->rc_ss_remove = 1;
1722 		return (TRUE);
1723 	}
1724 	/*
1725 	 * If the sysadmin has used clear_locks for this
1726 	 * entry then forced_expire will be set and we
1727 	 * want this entry to be reaped. Or the entry
1728 	 * has exceeded its lease period.
1729 	 */
1730 	cp_expired = (cp->rc_forced_expire ||
1731 	    (gethrestime_sec() - cp->rc_last_access
1732 	    > rfs4_lease_time));
1733 
1734 	if (!cp->rc_ss_remove && cp_expired)
1735 		cp->rc_ss_remove = 1;
1736 	return (cp_expired);
1737 }
1738 
1739 /*
1740  * Remove the leaf file from all distributed stable storage paths.
1741  */
1742 static void
rfs4_dss_remove_cpleaf(rfs4_client_t * cp)1743 rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
1744 {
1745 	nfs4_srv_t *nsrv4;
1746 	rfs4_servinst_t *sip;
1747 	char *leaf = cp->rc_ss_pn->leaf;
1748 
1749 	/*
1750 	 * since the state files are written to all DSS
1751 	 * paths we must remove this leaf file instance
1752 	 * from all server instances.
1753 	 */
1754 
1755 	nsrv4 = nfs4_get_srv();
1756 	mutex_enter(&nsrv4->servinst_lock);
1757 	for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
1758 		/* remove the leaf file associated with this server instance */
1759 		rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf);
1760 	}
1761 	mutex_exit(&nsrv4->servinst_lock);
1762 }
1763 
1764 static void
rfs4_dss_remove_leaf(rfs4_servinst_t * sip,char * dir_leaf,char * leaf)1765 rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf)
1766 {
1767 	int i, npaths = sip->dss_npaths;
1768 
1769 	for (i = 0; i < npaths; i++) {
1770 		rfs4_dss_path_t *dss_path = sip->dss_paths[i];
1771 		char *path, *dir;
1772 		size_t pathlen;
1773 
1774 		/* the HA-NFSv4 path might have been failed-over away from us */
1775 		if (dss_path == NULL)
1776 			continue;
1777 
1778 		dir = dss_path->path;
1779 
1780 		/* allow 3 extra bytes for two '/' & a NUL */
1781 		pathlen = strlen(dir) + strlen(dir_leaf) + strlen(leaf) + 3;
1782 		path = kmem_alloc(pathlen, KM_SLEEP);
1783 		(void) sprintf(path, "%s/%s/%s", dir, dir_leaf, leaf);
1784 
1785 		(void) vn_remove(path, UIO_SYSSPACE, RMFILE);
1786 
1787 		kmem_free(path, pathlen);
1788 	}
1789 }
1790 
1791 static void
rfs4_client_destroy(rfs4_entry_t u_entry)1792 rfs4_client_destroy(rfs4_entry_t u_entry)
1793 {
1794 	rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1795 
1796 	mutex_destroy(cp->rc_cbinfo.cb_lock);
1797 	cv_destroy(cp->rc_cbinfo.cb_cv);
1798 	cv_destroy(cp->rc_cbinfo.cb_cv_nullcaller);
1799 	list_destroy(&cp->rc_openownerlist);
1800 
1801 	list_destroy(&cp->rc_sessions);
1802 
1803 	/* free callback info */
1804 	rfs4_cbinfo_free(&cp->rc_cbinfo);
1805 
1806 	if (cp->rc_cp_confirmed)
1807 		rfs4_client_rele(cp->rc_cp_confirmed);
1808 
1809 	if (cp->rc_ss_pn) {
1810 		/* check if the stable storage files need to be removed */
1811 		if (cp->rc_ss_remove)
1812 			rfs4_dss_remove_cpleaf(cp);
1813 		rfs4_ss_pnfree(cp->rc_ss_pn);
1814 	}
1815 
1816 	/* Free the client supplied client id */
1817 	kmem_free(cp->rc_nfs_client.id_val, cp->rc_nfs_client.id_len);
1818 
1819 	if (cp->rc_sysidt != LM_NOSYSID)
1820 		lm_free_sysidt(cp->rc_sysidt);
1821 
1822 	rfs4_free_cred_set(&cp->rc_cr_set);
1823 }
1824 
1825 static bool_t
rfs4_client_create(rfs4_entry_t u_entry,void * arg)1826 rfs4_client_create(rfs4_entry_t u_entry, void *arg)
1827 {
1828 	rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1829 	nfs_client_id4 *client = (nfs_client_id4 *)arg;
1830 	struct sockaddr *ca;
1831 	cid *cidp;
1832 	scid_confirm_verf *scvp;
1833 	nfs4_srv_t *nsrv4;
1834 
1835 	nsrv4 = nfs4_get_srv();
1836 
1837 	/* Get a clientid to give to the client */
1838 	cidp = (cid *)&cp->rc_clientid;
1839 	cidp->impl_id.start_time = nsrv4->rfs4_start_time;
1840 	cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe);
1841 
1842 	/* If we are booted as a cluster node, embed our nodeid */
1843 	if (cluster_bootflags & CLUSTER_BOOTED)
1844 		embed_nodeid(cidp);
1845 
1846 	/* Allocate and copy client's client id value */
1847 	cp->rc_nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP);
1848 	cp->rc_nfs_client.id_len = client->id_len;
1849 	bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len);
1850 	cp->rc_nfs_client.verifier = client->verifier;
1851 
1852 	/* Copy client's IP address */
1853 	ca = client->cl_addr;
1854 	if (ca->sa_family == AF_INET)
1855 		bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in));
1856 	else if (ca->sa_family == AF_INET6)
1857 		bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in6));
1858 	cp->rc_nfs_client.cl_addr = (struct sockaddr *)&cp->rc_addr;
1859 
1860 	/* Init the value for the SETCLIENTID_CONFIRM verifier */
1861 	scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1862 	scvp->cv_impl.c_id = cidp->impl_id.c_id;
1863 	scvp->cv_impl.gen_num = 0;
1864 
1865 	/* An F_UNLKSYS has been done for this client */
1866 	cp->rc_unlksys_completed = FALSE;
1867 
1868 	/* We need the client to ack us */
1869 	cp->rc_need_confirm = TRUE;
1870 	cp->rc_cp_confirmed = NULL;
1871 	cp->rc_destroying = FALSE;
1872 
1873 	/* TRUE all the time until the callback path actually fails */
1874 	cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
1875 
1876 	/* Initialize the access time to now */
1877 	cp->rc_last_access = gethrestime_sec();
1878 
1879 	bzero(&cp->rc_cr_set, sizeof (cred_set_t));
1880 
1881 	cp->rc_sysidt = LM_NOSYSID;
1882 
1883 	list_create(&cp->rc_openownerlist, sizeof (rfs4_openowner_t),
1884 	    offsetof(rfs4_openowner_t, ro_node));
1885 
1886 	list_create(&cp->rc_sessions, sizeof (rfs4_session_t),
1887 	    offsetof(rfs4_session_t, sn_node));
1888 
1889 	/* set up the callback control structure */
1890 	cp->rc_cbinfo.cb_state = CB_UNINIT;
1891 	mutex_init(cp->rc_cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL);
1892 	cv_init(cp->rc_cbinfo.cb_cv, NULL, CV_DEFAULT, NULL);
1893 	cv_init(cp->rc_cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL);
1894 
1895 	/*
1896 	 * Associate the client_t with the current server instance.
1897 	 * The hold is solely to satisfy the calling requirement of
1898 	 * rfs4_servinst_assign(). In this case it's not strictly necessary.
1899 	 */
1900 	rfs4_dbe_hold(cp->rc_dbe);
1901 	rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
1902 	rfs4_dbe_rele(cp->rc_dbe);
1903 
1904 	/*
1905 	 * NFSv4.1: See rfc8881, Section 18.36.4, eir_sequenceid
1906 	 * "Before the server replies to that EXCHANGE_ID
1907 	 * operation, it initializes the client ID slot to be equal to
1908 	 * eir_sequenceid - 1 (accounting for underflow), and records a
1909 	 * contrived CREATE_SESSION result with a "cached" result of
1910 	 * NFS4ERR_SEQ_MISORDERED."
1911 	 */
1912 	cp->rc_contrived.xi_sid = 1;
1913 	cp->rc_contrived.cs_status = NFS4ERR_SEQ_MISORDERED;
1914 
1915 	return (TRUE);
1916 }
1917 
1918 /*
1919  * Caller wants to generate/update the setclientid_confirm verifier
1920  * associated with a client.  This is done during the SETCLIENTID
1921  * processing.
1922  */
1923 void
rfs4_client_scv_next(rfs4_client_t * cp)1924 rfs4_client_scv_next(rfs4_client_t *cp)
1925 {
1926 	scid_confirm_verf *scvp;
1927 
1928 	/* Init the value for the SETCLIENTID_CONFIRM verifier */
1929 	scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1930 	scvp->cv_impl.gen_num++;
1931 }
1932 
1933 void
rfs4_client_rele(rfs4_client_t * cp)1934 rfs4_client_rele(rfs4_client_t *cp)
1935 {
1936 	rfs4_dbe_rele(cp->rc_dbe);
1937 }
1938 
1939 rfs4_client_t *
rfs4_findclient(nfs_client_id4 * client,bool_t * create,rfs4_client_t * oldcp)1940 rfs4_findclient(nfs_client_id4 *client, bool_t *create,	rfs4_client_t *oldcp)
1941 {
1942 	rfs4_client_t *cp;
1943 	nfs4_srv_t *nsrv4;
1944 	nsrv4 = nfs4_get_srv();
1945 
1946 
1947 	if (oldcp) {
1948 		rw_enter(&nsrv4->rfs4_findclient_lock, RW_WRITER);
1949 		rfs4_dbe_hide(oldcp->rc_dbe);
1950 	} else {
1951 		rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1952 	}
1953 
1954 	cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_nfsclnt_idx, client,
1955 	    create, (void *)client, RFS4_DBS_VALID);
1956 
1957 	if (oldcp)
1958 		rfs4_dbe_unhide(oldcp->rc_dbe);
1959 
1960 	rw_exit(&nsrv4->rfs4_findclient_lock);
1961 
1962 	return (cp);
1963 }
1964 
1965 rfs4_client_t *
rfs4_findclient_by_id(clientid4 clientid,bool_t find_unconfirmed)1966 rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed)
1967 {
1968 	rfs4_client_t *cp;
1969 	bool_t create = FALSE;
1970 	cid *cidp = (cid *)&clientid;
1971 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
1972 
1973 	/* If we're a cluster and the nodeid isn't right, short-circuit */
1974 	if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
1975 		return (NULL);
1976 
1977 	rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1978 
1979 	cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx, &clientid,
1980 	    &create, NULL, RFS4_DBS_VALID);
1981 
1982 	rw_exit(&nsrv4->rfs4_findclient_lock);
1983 
1984 	if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) {
1985 		rfs4_client_rele(cp);
1986 		return (NULL);
1987 	} else {
1988 		return (cp);
1989 	}
1990 }
1991 
1992 static uint32_t
clntip_hash(void * key)1993 clntip_hash(void *key)
1994 {
1995 	struct sockaddr *addr = key;
1996 	int i, len = 0;
1997 	uint32_t hash = 0;
1998 	char *ptr;
1999 
2000 	if (addr->sa_family == AF_INET) {
2001 		struct sockaddr_in *a = (struct sockaddr_in *)addr;
2002 		len = sizeof (struct in_addr);
2003 		ptr = (char *)&a->sin_addr;
2004 	} else if (addr->sa_family == AF_INET6) {
2005 		struct sockaddr_in6 *a = (struct sockaddr_in6 *)addr;
2006 		len = sizeof (struct in6_addr);
2007 		ptr = (char *)&a->sin6_addr;
2008 	} else
2009 		return (0);
2010 
2011 	for (i = 0; i < len; i++) {
2012 		hash <<= 1;
2013 		hash += (uint_t)ptr[i];
2014 	}
2015 	return (hash);
2016 }
2017 
2018 static bool_t
clntip_compare(rfs4_entry_t entry,void * key)2019 clntip_compare(rfs4_entry_t entry, void *key)
2020 {
2021 	rfs4_clntip_t *cp = (rfs4_clntip_t *)entry;
2022 	struct sockaddr *addr = key;
2023 	int len = 0;
2024 	char *p1, *p2;
2025 
2026 	if (addr->sa_family == AF_INET) {
2027 		struct sockaddr_in *a1 = (struct sockaddr_in *)&cp->ri_addr;
2028 		struct sockaddr_in *a2 = (struct sockaddr_in *)addr;
2029 		len = sizeof (struct in_addr);
2030 		p1 = (char *)&a1->sin_addr;
2031 		p2 = (char *)&a2->sin_addr;
2032 	} else if (addr->sa_family == AF_INET6) {
2033 		struct sockaddr_in6 *a1 = (struct sockaddr_in6 *)&cp->ri_addr;
2034 		struct sockaddr_in6 *a2 = (struct sockaddr_in6 *)addr;
2035 		len = sizeof (struct in6_addr);
2036 		p1 = (char *)&a1->sin6_addr;
2037 		p2 = (char *)&a2->sin6_addr;
2038 	} else
2039 		return (0);
2040 
2041 	return (bcmp(p1, p2, len) == 0);
2042 }
2043 
2044 static void *
clntip_mkkey(rfs4_entry_t entry)2045 clntip_mkkey(rfs4_entry_t entry)
2046 {
2047 	rfs4_clntip_t *cp = (rfs4_clntip_t *)entry;
2048 
2049 	return (&cp->ri_addr);
2050 }
2051 
2052 static bool_t
rfs4_clntip_expiry(rfs4_entry_t u_entry)2053 rfs4_clntip_expiry(rfs4_entry_t u_entry)
2054 {
2055 	rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
2056 
2057 	if (rfs4_dbe_is_invalid(cp->ri_dbe))
2058 		return (TRUE);
2059 	return (FALSE);
2060 }
2061 
2062 /* ARGSUSED */
2063 static void
rfs4_clntip_destroy(rfs4_entry_t u_entry)2064 rfs4_clntip_destroy(rfs4_entry_t u_entry)
2065 {
2066 }
2067 
2068 static bool_t
rfs4_clntip_create(rfs4_entry_t u_entry,void * arg)2069 rfs4_clntip_create(rfs4_entry_t u_entry, void *arg)
2070 {
2071 	rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
2072 	struct sockaddr *ca = (struct sockaddr *)arg;
2073 
2074 	/* Copy client's IP address */
2075 	if (ca->sa_family == AF_INET)
2076 		bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in));
2077 	else if (ca->sa_family == AF_INET6)
2078 		bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in6));
2079 	else
2080 		return (FALSE);
2081 	cp->ri_no_referrals = 1;
2082 
2083 	return (TRUE);
2084 }
2085 
2086 rfs4_clntip_t *
rfs4_find_clntip(struct sockaddr * addr,bool_t * create)2087 rfs4_find_clntip(struct sockaddr *addr, bool_t *create)
2088 {
2089 	rfs4_clntip_t *cp;
2090 	nfs4_srv_t *nsrv4;
2091 
2092 	nsrv4 = nfs4_get_srv();
2093 
2094 	rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2095 
2096 	cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
2097 	    create, addr, RFS4_DBS_VALID);
2098 
2099 	rw_exit(&nsrv4->rfs4_findclient_lock);
2100 
2101 	return (cp);
2102 }
2103 
2104 void
rfs4_invalidate_clntip(struct sockaddr * addr)2105 rfs4_invalidate_clntip(struct sockaddr *addr)
2106 {
2107 	rfs4_clntip_t *cp;
2108 	bool_t create = FALSE;
2109 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2110 
2111 	rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2112 
2113 	cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
2114 	    &create, NULL, RFS4_DBS_VALID);
2115 	if (cp == NULL) {
2116 		rw_exit(&nsrv4->rfs4_findclient_lock);
2117 		return;
2118 	}
2119 	rfs4_dbe_invalidate(cp->ri_dbe);
2120 	rfs4_dbe_rele(cp->ri_dbe);
2121 
2122 	rw_exit(&nsrv4->rfs4_findclient_lock);
2123 }
2124 
2125 bool_t
rfs4_lease_expired(rfs4_client_t * cp)2126 rfs4_lease_expired(rfs4_client_t *cp)
2127 {
2128 	bool_t rc;
2129 
2130 	rfs4_dbe_lock(cp->rc_dbe);
2131 
2132 	/*
2133 	 * If the admin has executed clear_locks for this
2134 	 * client id, force expire will be set, so no need
2135 	 * to calculate anything because it's "outa here".
2136 	 */
2137 	if (cp->rc_forced_expire) {
2138 		rc = TRUE;
2139 	} else {
2140 		rc = (gethrestime_sec() - cp->rc_last_access > rfs4_lease_time);
2141 	}
2142 
2143 	/*
2144 	 * If the lease has expired we will also want
2145 	 * to remove any stable storage state data. So
2146 	 * mark the client id accordingly.
2147 	 */
2148 	if (!cp->rc_ss_remove)
2149 		cp->rc_ss_remove = (rc == TRUE);
2150 
2151 	rfs4_dbe_unlock(cp->rc_dbe);
2152 
2153 	return (rc);
2154 }
2155 
2156 void
rfs4_update_lease(rfs4_client_t * cp)2157 rfs4_update_lease(rfs4_client_t *cp)
2158 {
2159 	rfs4_dbe_lock(cp->rc_dbe);
2160 	if (!cp->rc_forced_expire)
2161 		cp->rc_last_access = gethrestime_sec();
2162 	rfs4_dbe_unlock(cp->rc_dbe);
2163 }
2164 
2165 
2166 static bool_t
EQOPENOWNER(open_owner4 * a,open_owner4 * b)2167 EQOPENOWNER(open_owner4 *a, open_owner4 *b)
2168 {
2169 	bool_t rc;
2170 
2171 	if (a->clientid != b->clientid)
2172 		return (FALSE);
2173 
2174 	if (a->owner_len != b->owner_len)
2175 		return (FALSE);
2176 
2177 	rc = (bcmp(a->owner_val, b->owner_val, a->owner_len) == 0);
2178 
2179 	return (rc);
2180 }
2181 
2182 static uint_t
openowner_hash(void * key)2183 openowner_hash(void *key)
2184 {
2185 	int i;
2186 	open_owner4 *openowner = key;
2187 	uint_t hash = 0;
2188 
2189 	for (i = 0; i < openowner->owner_len; i++) {
2190 		hash <<= 4;
2191 		hash += (uint_t)openowner->owner_val[i];
2192 	}
2193 	hash += (uint_t)openowner->clientid;
2194 	hash |= (openowner->clientid >> 32);
2195 
2196 	return (hash);
2197 }
2198 
2199 static bool_t
openowner_compare(rfs4_entry_t u_entry,void * key)2200 openowner_compare(rfs4_entry_t u_entry, void *key)
2201 {
2202 	rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2203 	open_owner4 *arg = key;
2204 
2205 	return (EQOPENOWNER(&oo->ro_owner, arg));
2206 }
2207 
2208 void *
openowner_mkkey(rfs4_entry_t u_entry)2209 openowner_mkkey(rfs4_entry_t u_entry)
2210 {
2211 	rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2212 
2213 	return (&oo->ro_owner);
2214 }
2215 
2216 /* ARGSUSED */
2217 static bool_t
rfs4_openowner_expiry(rfs4_entry_t u_entry)2218 rfs4_openowner_expiry(rfs4_entry_t u_entry)
2219 {
2220 	/* openstateid held us and did all needed delay */
2221 	return (TRUE);
2222 }
2223 
2224 static void
rfs4_openowner_destroy(rfs4_entry_t u_entry)2225 rfs4_openowner_destroy(rfs4_entry_t u_entry)
2226 {
2227 	rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2228 
2229 	/* Remove open owner from client's lists of open owners */
2230 	rfs4_dbe_lock(oo->ro_client->rc_dbe);
2231 	list_remove(&oo->ro_client->rc_openownerlist, oo);
2232 	rfs4_dbe_unlock(oo->ro_client->rc_dbe);
2233 
2234 	/* One less reference to the client */
2235 	rfs4_client_rele(oo->ro_client);
2236 	oo->ro_client = NULL;
2237 
2238 	/* Free the last reply for this lock owner */
2239 	rfs4_free_reply(&oo->ro_reply);
2240 
2241 	if (oo->ro_reply_fh.nfs_fh4_val) {
2242 		kmem_free(oo->ro_reply_fh.nfs_fh4_val,
2243 		    oo->ro_reply_fh.nfs_fh4_len);
2244 		oo->ro_reply_fh.nfs_fh4_val = NULL;
2245 		oo->ro_reply_fh.nfs_fh4_len = 0;
2246 	}
2247 
2248 	rfs4_sw_destroy(&oo->ro_sw);
2249 	list_destroy(&oo->ro_statelist);
2250 
2251 	/* Free the lock owner id */
2252 	kmem_free(oo->ro_owner.owner_val, oo->ro_owner.owner_len);
2253 }
2254 
2255 void
rfs4_openowner_rele(rfs4_openowner_t * oo)2256 rfs4_openowner_rele(rfs4_openowner_t *oo)
2257 {
2258 	rfs4_dbe_rele(oo->ro_dbe);
2259 }
2260 
2261 static bool_t
rfs4_openowner_create(rfs4_entry_t u_entry,void * arg)2262 rfs4_openowner_create(rfs4_entry_t u_entry, void *arg)
2263 {
2264 	rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2265 	rfs4_openowner_t *argp = (rfs4_openowner_t *)arg;
2266 	open_owner4 *openowner = &argp->ro_owner;
2267 	seqid4 seqid = argp->ro_open_seqid;
2268 	rfs4_client_t *cp;
2269 	bool_t create = FALSE;
2270 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2271 
2272 	rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2273 
2274 	cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2275 	    &openowner->clientid,
2276 	    &create, NULL, RFS4_DBS_VALID);
2277 
2278 	rw_exit(&nsrv4->rfs4_findclient_lock);
2279 
2280 	if (cp == NULL)
2281 		return (FALSE);
2282 
2283 	oo->ro_reply_fh.nfs_fh4_len = 0;
2284 	oo->ro_reply_fh.nfs_fh4_val = NULL;
2285 
2286 	oo->ro_owner.clientid = openowner->clientid;
2287 	oo->ro_owner.owner_val =
2288 	    kmem_alloc(openowner->owner_len, KM_SLEEP);
2289 
2290 	bcopy(openowner->owner_val,
2291 	    oo->ro_owner.owner_val, openowner->owner_len);
2292 
2293 	oo->ro_owner.owner_len = openowner->owner_len;
2294 
2295 	oo->ro_need_confirm = TRUE;
2296 
2297 	rfs4_sw_init(&oo->ro_sw);
2298 
2299 	oo->ro_open_seqid = seqid;
2300 	bzero(&oo->ro_reply, sizeof (nfs_resop4));
2301 	oo->ro_client = cp;
2302 
2303 	list_create(&oo->ro_statelist, sizeof (rfs4_state_t),
2304 	    offsetof(rfs4_state_t, rs_node));
2305 
2306 	/* Insert openowner into client's open owner list */
2307 	rfs4_dbe_lock(cp->rc_dbe);
2308 	list_insert_tail(&cp->rc_openownerlist, oo);
2309 	rfs4_dbe_unlock(cp->rc_dbe);
2310 
2311 	return (TRUE);
2312 }
2313 
2314 rfs4_openowner_t *
rfs4_findopenowner(open_owner4 * openowner,bool_t * create,seqid4 seqid)2315 rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid)
2316 {
2317 	rfs4_openowner_t *oo;
2318 	rfs4_openowner_t arg;
2319 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2320 
2321 	arg.ro_owner = *openowner;
2322 	arg.ro_open_seqid = seqid;
2323 	/* CSTYLED */
2324 	oo = (rfs4_openowner_t *)rfs4_dbsearch(nsrv4->rfs4_openowner_idx, openowner,
2325 	    create, &arg, RFS4_DBS_VALID);
2326 
2327 	return (oo);
2328 }
2329 
2330 void
rfs4_update_open_sequence(rfs4_openowner_t * oo)2331 rfs4_update_open_sequence(rfs4_openowner_t *oo)
2332 {
2333 
2334 	rfs4_dbe_lock(oo->ro_dbe);
2335 
2336 	oo->ro_open_seqid++;
2337 
2338 	rfs4_dbe_unlock(oo->ro_dbe);
2339 }
2340 
2341 void
rfs4_update_open_resp(rfs4_openowner_t * oo,nfs_resop4 * resp,nfs_fh4 * fh)2342 rfs4_update_open_resp(rfs4_openowner_t *oo, nfs_resop4 *resp, nfs_fh4 *fh)
2343 {
2344 
2345 	rfs4_dbe_lock(oo->ro_dbe);
2346 
2347 	rfs4_free_reply(&oo->ro_reply);
2348 
2349 	rfs4_copy_reply(&oo->ro_reply, resp);
2350 
2351 	/* Save the filehandle if provided and free if not used */
2352 	if (resp->nfs_resop4_u.opopen.status == NFS4_OK &&
2353 	    fh && fh->nfs_fh4_len) {
2354 		if (oo->ro_reply_fh.nfs_fh4_val == NULL)
2355 			oo->ro_reply_fh.nfs_fh4_val =
2356 			    kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
2357 		nfs_fh4_copy(fh, &oo->ro_reply_fh);
2358 	} else {
2359 		if (oo->ro_reply_fh.nfs_fh4_val) {
2360 			kmem_free(oo->ro_reply_fh.nfs_fh4_val,
2361 			    oo->ro_reply_fh.nfs_fh4_len);
2362 			oo->ro_reply_fh.nfs_fh4_val = NULL;
2363 			oo->ro_reply_fh.nfs_fh4_len = 0;
2364 		}
2365 	}
2366 
2367 	rfs4_dbe_unlock(oo->ro_dbe);
2368 }
2369 
2370 static bool_t
lockowner_compare(rfs4_entry_t u_entry,void * key)2371 lockowner_compare(rfs4_entry_t u_entry, void *key)
2372 {
2373 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2374 	lock_owner4 *b = (lock_owner4 *)key;
2375 
2376 	if (lo->rl_owner.clientid != b->clientid)
2377 		return (FALSE);
2378 
2379 	if (lo->rl_owner.owner_len != b->owner_len)
2380 		return (FALSE);
2381 
2382 	return (bcmp(lo->rl_owner.owner_val, b->owner_val,
2383 	    lo->rl_owner.owner_len) == 0);
2384 }
2385 
2386 void *
lockowner_mkkey(rfs4_entry_t u_entry)2387 lockowner_mkkey(rfs4_entry_t u_entry)
2388 {
2389 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2390 
2391 	return (&lo->rl_owner);
2392 }
2393 
2394 static uint32_t
lockowner_hash(void * key)2395 lockowner_hash(void *key)
2396 {
2397 	int i;
2398 	lock_owner4 *lockowner = key;
2399 	uint_t hash = 0;
2400 
2401 	for (i = 0; i < lockowner->owner_len; i++) {
2402 		hash <<= 4;
2403 		hash += (uint_t)lockowner->owner_val[i];
2404 	}
2405 	hash += (uint_t)lockowner->clientid;
2406 	hash |= (lockowner->clientid >> 32);
2407 
2408 	return (hash);
2409 }
2410 
2411 static uint32_t
pid_hash(void * key)2412 pid_hash(void *key)
2413 {
2414 	return ((uint32_t)(uintptr_t)key);
2415 }
2416 
2417 static void *
pid_mkkey(rfs4_entry_t u_entry)2418 pid_mkkey(rfs4_entry_t u_entry)
2419 {
2420 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2421 
2422 	return ((void *)(uintptr_t)lo->rl_pid);
2423 }
2424 
2425 static bool_t
pid_compare(rfs4_entry_t u_entry,void * key)2426 pid_compare(rfs4_entry_t u_entry, void *key)
2427 {
2428 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2429 
2430 	return (lo->rl_pid == (pid_t)(uintptr_t)key);
2431 }
2432 
2433 static void
rfs4_lockowner_destroy(rfs4_entry_t u_entry)2434 rfs4_lockowner_destroy(rfs4_entry_t u_entry)
2435 {
2436 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2437 
2438 	/* Free the lock owner id */
2439 	kmem_free(lo->rl_owner.owner_val, lo->rl_owner.owner_len);
2440 	rfs4_client_rele(lo->rl_client);
2441 }
2442 
2443 void
rfs4_lockowner_rele(rfs4_lockowner_t * lo)2444 rfs4_lockowner_rele(rfs4_lockowner_t *lo)
2445 {
2446 	rfs4_dbe_rele(lo->rl_dbe);
2447 }
2448 
2449 /* ARGSUSED */
2450 static bool_t
rfs4_lockowner_expiry(rfs4_entry_t u_entry)2451 rfs4_lockowner_expiry(rfs4_entry_t u_entry)
2452 {
2453 	/*
2454 	 * Since expiry is called with no other references on
2455 	 * this struct, go ahead and have it removed.
2456 	 */
2457 	return (TRUE);
2458 }
2459 
2460 static bool_t
rfs4_lockowner_create(rfs4_entry_t u_entry,void * arg)2461 rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg)
2462 {
2463 	rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2464 	lock_owner4 *lockowner = (lock_owner4 *)arg;
2465 	rfs4_client_t *cp;
2466 	bool_t create = FALSE;
2467 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2468 
2469 	rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2470 
2471 	cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2472 	    &lockowner->clientid,
2473 	    &create, NULL, RFS4_DBS_VALID);
2474 
2475 	rw_exit(&nsrv4->rfs4_findclient_lock);
2476 
2477 	if (cp == NULL)
2478 		return (FALSE);
2479 
2480 	/* Reference client */
2481 	lo->rl_client = cp;
2482 	lo->rl_owner.clientid = lockowner->clientid;
2483 	lo->rl_owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP);
2484 	bcopy(lockowner->owner_val, lo->rl_owner.owner_val,
2485 	    lockowner->owner_len);
2486 	lo->rl_owner.owner_len = lockowner->owner_len;
2487 	lo->rl_pid = rfs4_dbe_getid(lo->rl_dbe);
2488 
2489 	return (TRUE);
2490 }
2491 
2492 rfs4_lockowner_t *
rfs4_findlockowner(lock_owner4 * lockowner,bool_t * create)2493 rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create)
2494 {
2495 	rfs4_lockowner_t *lo;
2496 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2497 
2498 	/* CSTYLED */
2499 	lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_idx, lockowner,
2500 	    create, lockowner, RFS4_DBS_VALID);
2501 
2502 	return (lo);
2503 }
2504 
2505 rfs4_lockowner_t *
rfs4_findlockowner_by_pid(pid_t pid)2506 rfs4_findlockowner_by_pid(pid_t pid)
2507 {
2508 	rfs4_lockowner_t *lo;
2509 	bool_t create = FALSE;
2510 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2511 
2512 	lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_pid_idx,
2513 	    (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID);
2514 
2515 	return (lo);
2516 }
2517 
2518 
2519 static uint32_t
file_hash(void * key)2520 file_hash(void *key)
2521 {
2522 	return (ADDRHASH(key));
2523 }
2524 
2525 static void *
file_mkkey(rfs4_entry_t u_entry)2526 file_mkkey(rfs4_entry_t u_entry)
2527 {
2528 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2529 
2530 	return (fp->rf_vp);
2531 }
2532 
2533 static bool_t
file_compare(rfs4_entry_t u_entry,void * key)2534 file_compare(rfs4_entry_t u_entry, void *key)
2535 {
2536 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2537 
2538 	return (fp->rf_vp == (vnode_t *)key);
2539 }
2540 
2541 static void
rfs4_file_destroy(rfs4_entry_t u_entry)2542 rfs4_file_destroy(rfs4_entry_t u_entry)
2543 {
2544 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2545 
2546 	list_destroy(&fp->rf_delegstatelist);
2547 
2548 	if (fp->rf_filehandle.nfs_fh4_val)
2549 		kmem_free(fp->rf_filehandle.nfs_fh4_val,
2550 		    fp->rf_filehandle.nfs_fh4_len);
2551 	cv_destroy(fp->rf_dinfo.rd_recall_cv);
2552 	if (fp->rf_vp) {
2553 		vnode_t *vp = fp->rf_vp;
2554 
2555 		mutex_enter(&vp->v_vsd_lock);
2556 		(void) vsd_set(vp, nfs4_srv_vkey, NULL);
2557 		mutex_exit(&vp->v_vsd_lock);
2558 		VN_RELE(vp);
2559 		fp->rf_vp = NULL;
2560 	}
2561 	rw_destroy(&fp->rf_file_rwlock);
2562 }
2563 
2564 /*
2565  * Used to unlock the underlying dbe struct only
2566  */
2567 void
rfs4_file_rele(rfs4_file_t * fp)2568 rfs4_file_rele(rfs4_file_t *fp)
2569 {
2570 	rfs4_dbe_rele(fp->rf_dbe);
2571 }
2572 
2573 typedef struct {
2574     vnode_t *vp;
2575     nfs_fh4 *fh;
2576 } rfs4_fcreate_arg;
2577 
2578 static bool_t
rfs4_file_create(rfs4_entry_t u_entry,void * arg)2579 rfs4_file_create(rfs4_entry_t u_entry, void *arg)
2580 {
2581 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2582 	rfs4_fcreate_arg *ap = (rfs4_fcreate_arg *)arg;
2583 	vnode_t *vp = ap->vp;
2584 	nfs_fh4 *fh = ap->fh;
2585 
2586 	VN_HOLD(vp);
2587 
2588 	fp->rf_filehandle.nfs_fh4_len = 0;
2589 	fp->rf_filehandle.nfs_fh4_val = NULL;
2590 	ASSERT(fh && fh->nfs_fh4_len);
2591 	if (fh && fh->nfs_fh4_len) {
2592 		fp->rf_filehandle.nfs_fh4_val =
2593 		    kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
2594 		nfs_fh4_copy(fh, &fp->rf_filehandle);
2595 	}
2596 	fp->rf_vp = vp;
2597 
2598 	list_create(&fp->rf_delegstatelist, sizeof (rfs4_deleg_state_t),
2599 	    offsetof(rfs4_deleg_state_t, rds_node));
2600 
2601 	fp->rf_share_deny = fp->rf_share_access = fp->rf_access_read = 0;
2602 	fp->rf_access_write = fp->rf_deny_read = fp->rf_deny_write = 0;
2603 
2604 	mutex_init(fp->rf_dinfo.rd_recall_lock, NULL, MUTEX_DEFAULT, NULL);
2605 	cv_init(fp->rf_dinfo.rd_recall_cv, NULL, CV_DEFAULT, NULL);
2606 
2607 	fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
2608 
2609 	rw_init(&fp->rf_file_rwlock, NULL, RW_DEFAULT, NULL);
2610 
2611 	mutex_enter(&vp->v_vsd_lock);
2612 	VERIFY(vsd_set(vp, nfs4_srv_vkey, (void *)fp) == 0);
2613 	mutex_exit(&vp->v_vsd_lock);
2614 
2615 	return (TRUE);
2616 }
2617 
2618 rfs4_file_t *
rfs4_findfile(vnode_t * vp,nfs_fh4 * fh,bool_t * create)2619 rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2620 {
2621 	rfs4_file_t *fp;
2622 	rfs4_fcreate_arg arg;
2623 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2624 
2625 	arg.vp = vp;
2626 	arg.fh = fh;
2627 
2628 	if (*create == TRUE)
2629 		/* CSTYLED */
2630 		fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp, create,
2631 		    &arg, RFS4_DBS_VALID);
2632 	else {
2633 		mutex_enter(&vp->v_vsd_lock);
2634 		fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2635 		if (fp) {
2636 			rfs4_dbe_lock(fp->rf_dbe);
2637 			if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2638 			    (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2639 				rfs4_dbe_unlock(fp->rf_dbe);
2640 				fp = NULL;
2641 			} else {
2642 				rfs4_dbe_hold(fp->rf_dbe);
2643 				rfs4_dbe_unlock(fp->rf_dbe);
2644 			}
2645 		}
2646 		mutex_exit(&vp->v_vsd_lock);
2647 	}
2648 	return (fp);
2649 }
2650 
2651 /*
2652  * Find a file in the db and once it is located, take the rw lock.
2653  * Need to check the vnode pointer and if it does not exist (it was
2654  * removed between the db location and check) redo the find.  This
2655  * assumes that a file struct that has a NULL vnode pointer is marked
2656  * at 'invalid' and will not be found in the db the second time
2657  * around.
2658  */
2659 rfs4_file_t *
rfs4_findfile_withlock(vnode_t * vp,nfs_fh4 * fh,bool_t * create)2660 rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2661 {
2662 	rfs4_file_t *fp;
2663 	rfs4_fcreate_arg arg;
2664 	bool_t screate = *create;
2665 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2666 
2667 	if (screate == FALSE) {
2668 		mutex_enter(&vp->v_vsd_lock);
2669 		fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2670 		if (fp) {
2671 			rfs4_dbe_lock(fp->rf_dbe);
2672 			if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2673 			    (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2674 				rfs4_dbe_unlock(fp->rf_dbe);
2675 				mutex_exit(&vp->v_vsd_lock);
2676 				fp = NULL;
2677 			} else {
2678 				rfs4_dbe_hold(fp->rf_dbe);
2679 				rfs4_dbe_unlock(fp->rf_dbe);
2680 				mutex_exit(&vp->v_vsd_lock);
2681 				rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2682 				if (fp->rf_vp == NULL) {
2683 					rw_exit(&fp->rf_file_rwlock);
2684 					rfs4_file_rele(fp);
2685 					fp = NULL;
2686 				}
2687 			}
2688 		} else {
2689 			mutex_exit(&vp->v_vsd_lock);
2690 		}
2691 	} else {
2692 retry:
2693 		arg.vp = vp;
2694 		arg.fh = fh;
2695 
2696 		fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp,
2697 		    create, &arg, RFS4_DBS_VALID);
2698 		if (fp != NULL) {
2699 			rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2700 			if (fp->rf_vp == NULL) {
2701 				rw_exit(&fp->rf_file_rwlock);
2702 				rfs4_file_rele(fp);
2703 				*create = screate;
2704 				goto retry;
2705 			}
2706 		}
2707 	}
2708 
2709 	return (fp);
2710 }
2711 
2712 static uint32_t
lo_state_hash(void * key)2713 lo_state_hash(void *key)
2714 {
2715 	stateid_t *id = key;
2716 
2717 	return (id->bits.ident+id->bits.pid);
2718 }
2719 
2720 static bool_t
lo_state_compare(rfs4_entry_t u_entry,void * key)2721 lo_state_compare(rfs4_entry_t u_entry, void *key)
2722 {
2723 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2724 	stateid_t *id = key;
2725 	bool_t rc;
2726 
2727 	rc = (lsp->rls_lockid.bits.boottime == id->bits.boottime &&
2728 	    lsp->rls_lockid.bits.type == id->bits.type &&
2729 	    lsp->rls_lockid.bits.ident == id->bits.ident &&
2730 	    lsp->rls_lockid.bits.pid == id->bits.pid);
2731 
2732 	return (rc);
2733 }
2734 
2735 static void *
lo_state_mkkey(rfs4_entry_t u_entry)2736 lo_state_mkkey(rfs4_entry_t u_entry)
2737 {
2738 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2739 
2740 	return (&lsp->rls_lockid);
2741 }
2742 
2743 static bool_t
rfs4_lo_state_expiry(rfs4_entry_t u_entry)2744 rfs4_lo_state_expiry(rfs4_entry_t u_entry)
2745 {
2746 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2747 
2748 	if (rfs4_dbe_is_invalid(lsp->rls_dbe))
2749 		return (TRUE);
2750 	if (lsp->rls_state->rs_closed)
2751 		return (TRUE);
2752 	return ((gethrestime_sec() -
2753 	    lsp->rls_state->rs_owner->ro_client->rc_last_access
2754 	    > rfs4_lease_time));
2755 }
2756 
2757 static void
rfs4_lo_state_destroy(rfs4_entry_t u_entry)2758 rfs4_lo_state_destroy(rfs4_entry_t u_entry)
2759 {
2760 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2761 
2762 	rfs4_dbe_lock(lsp->rls_state->rs_dbe);
2763 	list_remove(&lsp->rls_state->rs_lostatelist, lsp);
2764 	rfs4_dbe_unlock(lsp->rls_state->rs_dbe);
2765 
2766 	rfs4_sw_destroy(&lsp->rls_sw);
2767 
2768 	/* Make sure to release the file locks */
2769 	if (lsp->rls_locks_cleaned == FALSE) {
2770 		lsp->rls_locks_cleaned = TRUE;
2771 		if (lsp->rls_locker->rl_client->rc_sysidt != LM_NOSYSID) {
2772 			/* Is the PxFS kernel module loaded? */
2773 			if (lm_remove_file_locks != NULL) {
2774 				int new_sysid;
2775 
2776 				/* Encode the cluster nodeid in new sysid */
2777 				new_sysid =
2778 				    lsp->rls_locker->rl_client->rc_sysidt;
2779 				lm_set_nlmid_flk(&new_sysid);
2780 
2781 				/*
2782 				 * This PxFS routine removes file locks for a
2783 				 * client over all nodes of a cluster.
2784 				 */
2785 				DTRACE_PROBE1(nfss_i_clust_rm_lck,
2786 				    int, new_sysid);
2787 				(*lm_remove_file_locks)(new_sysid);
2788 			} else {
2789 				(void) cleanlocks(
2790 				    lsp->rls_state->rs_finfo->rf_vp,
2791 				    lsp->rls_locker->rl_pid,
2792 				    lsp->rls_locker->rl_client->rc_sysidt);
2793 			}
2794 		}
2795 	}
2796 
2797 	/* Free the last reply for this state */
2798 	rfs4_free_reply(&lsp->rls_reply);
2799 
2800 	rfs4_lockowner_rele(lsp->rls_locker);
2801 	lsp->rls_locker = NULL;
2802 
2803 	rfs4_state_rele_nounlock(lsp->rls_state);
2804 	lsp->rls_state = NULL;
2805 }
2806 
2807 static bool_t
rfs4_lo_state_create(rfs4_entry_t u_entry,void * arg)2808 rfs4_lo_state_create(rfs4_entry_t u_entry, void *arg)
2809 {
2810 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2811 	rfs4_lo_state_t *argp = (rfs4_lo_state_t *)arg;
2812 	rfs4_lockowner_t *lo = argp->rls_locker;
2813 	rfs4_state_t *sp = argp->rls_state;
2814 
2815 	lsp->rls_state = sp;
2816 
2817 	lsp->rls_lockid = sp->rs_stateid;
2818 	lsp->rls_lockid.bits.type = LOCKID;
2819 	lsp->rls_lockid.bits.chgseq = 0;
2820 	lsp->rls_lockid.bits.pid = lo->rl_pid;
2821 
2822 	lsp->rls_locks_cleaned = FALSE;
2823 	lsp->rls_lock_completed = FALSE;
2824 
2825 	rfs4_sw_init(&lsp->rls_sw);
2826 
2827 	/* Attached the supplied lock owner */
2828 	rfs4_dbe_hold(lo->rl_dbe);
2829 	lsp->rls_locker = lo;
2830 
2831 	rfs4_dbe_lock(sp->rs_dbe);
2832 	list_insert_tail(&sp->rs_lostatelist, lsp);
2833 	rfs4_dbe_hold(sp->rs_dbe);
2834 	rfs4_dbe_unlock(sp->rs_dbe);
2835 
2836 	return (TRUE);
2837 }
2838 
2839 void
rfs4_lo_state_rele(rfs4_lo_state_t * lsp,bool_t unlock_fp)2840 rfs4_lo_state_rele(rfs4_lo_state_t *lsp, bool_t unlock_fp)
2841 {
2842 	if (unlock_fp == TRUE)
2843 		rw_exit(&lsp->rls_state->rs_finfo->rf_file_rwlock);
2844 	rfs4_dbe_rele(lsp->rls_dbe);
2845 }
2846 
2847 static rfs4_lo_state_t *
rfs4_findlo_state(stateid_t * id,bool_t lock_fp)2848 rfs4_findlo_state(stateid_t *id, bool_t lock_fp)
2849 {
2850 	rfs4_lo_state_t *lsp;
2851 	bool_t create = FALSE;
2852 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2853 
2854 	lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_idx, id,
2855 	    &create, NULL, RFS4_DBS_VALID);
2856 	if (lock_fp == TRUE && lsp != NULL)
2857 		rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER);
2858 
2859 	return (lsp);
2860 }
2861 
2862 
2863 static uint32_t
lo_state_lo_hash(void * key)2864 lo_state_lo_hash(void *key)
2865 {
2866 	rfs4_lo_state_t *lsp = key;
2867 
2868 	return (ADDRHASH(lsp->rls_locker) ^ ADDRHASH(lsp->rls_state));
2869 }
2870 
2871 static bool_t
lo_state_lo_compare(rfs4_entry_t u_entry,void * key)2872 lo_state_lo_compare(rfs4_entry_t u_entry, void *key)
2873 {
2874 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2875 	rfs4_lo_state_t *keyp = key;
2876 
2877 	return (keyp->rls_locker == lsp->rls_locker &&
2878 	    keyp->rls_state == lsp->rls_state);
2879 }
2880 
2881 static void *
lo_state_lo_mkkey(rfs4_entry_t u_entry)2882 lo_state_lo_mkkey(rfs4_entry_t u_entry)
2883 {
2884 	return (u_entry);
2885 }
2886 
2887 rfs4_lo_state_t *
rfs4_findlo_state_by_owner(rfs4_lockowner_t * lo,rfs4_state_t * sp,bool_t * create)2888 rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp,
2889     bool_t *create)
2890 {
2891 	rfs4_lo_state_t *lsp;
2892 	rfs4_lo_state_t arg;
2893 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
2894 
2895 	arg.rls_locker = lo;
2896 	arg.rls_state = sp;
2897 
2898 	lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_owner_idx,
2899 	    &arg, create, &arg, RFS4_DBS_VALID);
2900 
2901 	return (lsp);
2902 }
2903 
2904 static stateid_t
get_stateid(id_t eid)2905 get_stateid(id_t eid)
2906 {
2907 	stateid_t id;
2908 	nfs4_srv_t *nsrv4;
2909 
2910 	nsrv4 = nfs4_get_srv();
2911 
2912 	id.bits.boottime = nsrv4->rfs4_start_time;
2913 	id.bits.ident = eid;
2914 	id.bits.chgseq = 0;
2915 	id.bits.type = 0;
2916 	id.bits.pid = 0;
2917 
2918 	/*
2919 	 * If we are booted as a cluster node, embed our nodeid.
2920 	 * We've already done sanity checks in rfs4_client_create() so no
2921 	 * need to repeat them here.
2922 	 */
2923 	id.bits.clnodeid = (cluster_bootflags & CLUSTER_BOOTED) ?
2924 	    clconf_get_nodeid() : 0;
2925 
2926 	return (id);
2927 }
2928 
2929 /*
2930  * For use only when booted as a cluster node.
2931  * Returns TRUE if the embedded nodeid indicates that this stateid was
2932  * generated on another node.
2933  */
2934 static int
foreign_stateid(stateid_t * id)2935 foreign_stateid(stateid_t *id)
2936 {
2937 	ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2938 	return (id->bits.clnodeid != (uint32_t)clconf_get_nodeid());
2939 }
2940 
2941 /*
2942  * For use only when booted as a cluster node.
2943  * Returns TRUE if the embedded nodeid indicates that this clientid was
2944  * generated on another node.
2945  */
2946 static int
foreign_clientid(cid * cidp)2947 foreign_clientid(cid *cidp)
2948 {
2949 	ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2950 	return (cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT !=
2951 	    (uint32_t)clconf_get_nodeid());
2952 }
2953 
2954 /*
2955  * For use only when booted as a cluster node.
2956  * Embed our cluster nodeid into the clientid.
2957  */
2958 static void
embed_nodeid(cid * cidp)2959 embed_nodeid(cid *cidp)
2960 {
2961 	int clnodeid;
2962 	/*
2963 	 * Currently, our state tables are small enough that their
2964 	 * ids will leave enough bits free for the nodeid. If the
2965 	 * tables become larger, we mustn't overwrite the id.
2966 	 * Equally, we only have room for so many bits of nodeid, so
2967 	 * must check that too.
2968 	 */
2969 	ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2970 	ASSERT(cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT == 0);
2971 	clnodeid = clconf_get_nodeid();
2972 	ASSERT(clnodeid <= CLUSTER_MAX_NODEID);
2973 	ASSERT(clnodeid != NODEID_UNKNOWN);
2974 	cidp->impl_id.c_id |= (clnodeid << CLUSTER_NODEID_SHIFT);
2975 }
2976 
2977 static uint32_t
state_hash(void * key)2978 state_hash(void *key)
2979 {
2980 	stateid_t *ip = (stateid_t *)key;
2981 
2982 	return (ip->bits.ident);
2983 }
2984 
2985 static bool_t
state_compare(rfs4_entry_t u_entry,void * key)2986 state_compare(rfs4_entry_t u_entry, void *key)
2987 {
2988 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2989 	stateid_t *id = (stateid_t *)key;
2990 	bool_t rc;
2991 
2992 	rc = (sp->rs_stateid.bits.boottime == id->bits.boottime &&
2993 	    sp->rs_stateid.bits.ident == id->bits.ident);
2994 
2995 	return (rc);
2996 }
2997 
2998 static void *
state_mkkey(rfs4_entry_t u_entry)2999 state_mkkey(rfs4_entry_t u_entry)
3000 {
3001 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3002 
3003 	return (&sp->rs_stateid);
3004 }
3005 
3006 static void
rfs4_state_destroy(rfs4_entry_t u_entry)3007 rfs4_state_destroy(rfs4_entry_t u_entry)
3008 {
3009 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3010 
3011 	/* remove from openowner list */
3012 	rfs4_dbe_lock(sp->rs_owner->ro_dbe);
3013 	list_remove(&sp->rs_owner->ro_statelist, sp);
3014 	rfs4_dbe_unlock(sp->rs_owner->ro_dbe);
3015 
3016 	list_destroy(&sp->rs_lostatelist);
3017 
3018 	/* release any share locks for this stateid if it's still open */
3019 	if (!sp->rs_closed) {
3020 		rfs4_dbe_lock(sp->rs_dbe);
3021 		(void) rfs4_unshare(sp);
3022 		rfs4_dbe_unlock(sp->rs_dbe);
3023 	}
3024 
3025 	/* Were done with the file */
3026 	rfs4_file_rele(sp->rs_finfo);
3027 	sp->rs_finfo = NULL;
3028 
3029 	/* And now with the openowner */
3030 	rfs4_openowner_rele(sp->rs_owner);
3031 	sp->rs_owner = NULL;
3032 }
3033 
3034 void
rfs4_state_rele_nounlock(rfs4_state_t * sp)3035 rfs4_state_rele_nounlock(rfs4_state_t *sp)
3036 {
3037 	rfs4_dbe_rele(sp->rs_dbe);
3038 }
3039 
3040 void
rfs4_state_rele(rfs4_state_t * sp)3041 rfs4_state_rele(rfs4_state_t *sp)
3042 {
3043 	rw_exit(&sp->rs_finfo->rf_file_rwlock);
3044 	rfs4_dbe_rele(sp->rs_dbe);
3045 }
3046 
3047 static uint32_t
deleg_hash(void * key)3048 deleg_hash(void *key)
3049 {
3050 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)key;
3051 
3052 	return (ADDRHASH(dsp->rds_client) ^ ADDRHASH(dsp->rds_finfo));
3053 }
3054 
3055 static bool_t
deleg_compare(rfs4_entry_t u_entry,void * key)3056 deleg_compare(rfs4_entry_t u_entry, void *key)
3057 {
3058 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3059 	rfs4_deleg_state_t *kdsp = (rfs4_deleg_state_t *)key;
3060 
3061 	return (dsp->rds_client == kdsp->rds_client &&
3062 	    dsp->rds_finfo == kdsp->rds_finfo);
3063 }
3064 
3065 static void *
deleg_mkkey(rfs4_entry_t u_entry)3066 deleg_mkkey(rfs4_entry_t u_entry)
3067 {
3068 	return (u_entry);
3069 }
3070 
3071 static uint32_t
deleg_state_hash(void * key)3072 deleg_state_hash(void *key)
3073 {
3074 	stateid_t *ip = (stateid_t *)key;
3075 
3076 	return (ip->bits.ident);
3077 }
3078 
3079 static bool_t
deleg_state_compare(rfs4_entry_t u_entry,void * key)3080 deleg_state_compare(rfs4_entry_t u_entry, void *key)
3081 {
3082 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3083 	stateid_t *id = (stateid_t *)key;
3084 	bool_t rc;
3085 
3086 	if (id->bits.type != DELEGID)
3087 		return (FALSE);
3088 
3089 	rc = (dsp->rds_delegid.bits.boottime == id->bits.boottime &&
3090 	    dsp->rds_delegid.bits.ident == id->bits.ident);
3091 
3092 	return (rc);
3093 }
3094 
3095 static void *
deleg_state_mkkey(rfs4_entry_t u_entry)3096 deleg_state_mkkey(rfs4_entry_t u_entry)
3097 {
3098 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3099 
3100 	return (&dsp->rds_delegid);
3101 }
3102 
3103 static bool_t
rfs4_deleg_state_expiry(rfs4_entry_t u_entry)3104 rfs4_deleg_state_expiry(rfs4_entry_t u_entry)
3105 {
3106 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3107 
3108 	if (rfs4_dbe_is_invalid(dsp->rds_dbe))
3109 		return (TRUE);
3110 
3111 	if (dsp->rds_dtype == OPEN_DELEGATE_NONE)
3112 		return (TRUE);
3113 
3114 	if ((gethrestime_sec() - dsp->rds_client->rc_last_access
3115 	    > rfs4_lease_time)) {
3116 		rfs4_dbe_invalidate(dsp->rds_dbe);
3117 		return (TRUE);
3118 	}
3119 
3120 	return (FALSE);
3121 }
3122 
3123 static bool_t
rfs4_deleg_state_create(rfs4_entry_t u_entry,void * argp)3124 rfs4_deleg_state_create(rfs4_entry_t u_entry, void *argp)
3125 {
3126 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3127 	rfs4_file_t *fp = ((rfs4_deleg_state_t *)argp)->rds_finfo;
3128 	rfs4_client_t *cp = ((rfs4_deleg_state_t *)argp)->rds_client;
3129 
3130 	rfs4_dbe_hold(fp->rf_dbe);
3131 	rfs4_dbe_hold(cp->rc_dbe);
3132 
3133 	dsp->rds_delegid = get_stateid(rfs4_dbe_getid(dsp->rds_dbe));
3134 	dsp->rds_delegid.bits.type = DELEGID;
3135 	dsp->rds_finfo = fp;
3136 	dsp->rds_client = cp;
3137 	dsp->rds_dtype = OPEN_DELEGATE_NONE;
3138 
3139 	dsp->rds_time_granted = gethrestime_sec();	/* observability */
3140 	dsp->rds_time_revoked = 0;
3141 
3142 	list_link_init(&dsp->rds_node);
3143 
3144 	return (TRUE);
3145 }
3146 
3147 static void
rfs4_deleg_state_destroy(rfs4_entry_t u_entry)3148 rfs4_deleg_state_destroy(rfs4_entry_t u_entry)
3149 {
3150 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3151 
3152 	/* return delegation if necessary */
3153 	rfs4_return_deleg(dsp, FALSE);
3154 
3155 	/* Were done with the file */
3156 	rfs4_file_rele(dsp->rds_finfo);
3157 	dsp->rds_finfo = NULL;
3158 
3159 	/* And now with the openowner */
3160 	rfs4_client_rele(dsp->rds_client);
3161 	dsp->rds_client = NULL;
3162 }
3163 
3164 rfs4_deleg_state_t *
rfs4_finddeleg(rfs4_state_t * sp,bool_t * create)3165 rfs4_finddeleg(rfs4_state_t *sp, bool_t *create)
3166 {
3167 	rfs4_deleg_state_t ds, *dsp;
3168 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
3169 
3170 	ds.rds_client = sp->rs_owner->ro_client;
3171 	ds.rds_finfo = sp->rs_finfo;
3172 
3173 	dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_idx, &ds,
3174 	    create, &ds, RFS4_DBS_VALID);
3175 
3176 	return (dsp);
3177 }
3178 
3179 rfs4_deleg_state_t *
rfs4_finddelegstate(stateid_t * id)3180 rfs4_finddelegstate(stateid_t *id)
3181 {
3182 	rfs4_deleg_state_t *dsp;
3183 	bool_t create = FALSE;
3184 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
3185 
3186 	dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_state_idx,
3187 	    id, &create, NULL, RFS4_DBS_VALID);
3188 
3189 	return (dsp);
3190 }
3191 
3192 void
rfs4_deleg_state_rele(rfs4_deleg_state_t * dsp)3193 rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp)
3194 {
3195 	rfs4_dbe_rele(dsp->rds_dbe);
3196 }
3197 
3198 void
rfs4_update_lock_sequence(rfs4_lo_state_t * lsp)3199 rfs4_update_lock_sequence(rfs4_lo_state_t *lsp)
3200 {
3201 
3202 	rfs4_dbe_lock(lsp->rls_dbe);
3203 
3204 	/*
3205 	 * If we are skipping sequence id checking, this means that
3206 	 * this is the first lock request and therefore the sequence
3207 	 * id does not need to be updated.  This only happens on the
3208 	 * first lock request for a lockowner
3209 	 */
3210 	if (!lsp->rls_skip_seqid_check)
3211 		lsp->rls_seqid++;
3212 
3213 	rfs4_dbe_unlock(lsp->rls_dbe);
3214 }
3215 
3216 void
rfs4_update_lock_resp(rfs4_lo_state_t * lsp,nfs_resop4 * resp)3217 rfs4_update_lock_resp(rfs4_lo_state_t *lsp, nfs_resop4 *resp)
3218 {
3219 
3220 	rfs4_dbe_lock(lsp->rls_dbe);
3221 
3222 	rfs4_free_reply(&lsp->rls_reply);
3223 
3224 	rfs4_copy_reply(&lsp->rls_reply, resp);
3225 
3226 	rfs4_dbe_unlock(lsp->rls_dbe);
3227 }
3228 
3229 void
rfs4_free_opens(rfs4_openowner_t * oo,bool_t invalidate,bool_t close_of_client)3230 rfs4_free_opens(rfs4_openowner_t *oo, bool_t invalidate,
3231     bool_t close_of_client)
3232 {
3233 	rfs4_state_t *sp;
3234 
3235 	rfs4_dbe_lock(oo->ro_dbe);
3236 
3237 	for (sp = list_head(&oo->ro_statelist); sp != NULL;
3238 	    sp = list_next(&oo->ro_statelist, sp)) {
3239 		rfs4_state_close(sp, FALSE, close_of_client, CRED());
3240 		if (invalidate == TRUE)
3241 			rfs4_dbe_invalidate(sp->rs_dbe);
3242 	}
3243 
3244 	rfs4_dbe_invalidate(oo->ro_dbe);
3245 	rfs4_dbe_unlock(oo->ro_dbe);
3246 }
3247 
3248 static uint32_t
state_owner_file_hash(void * key)3249 state_owner_file_hash(void *key)
3250 {
3251 	rfs4_state_t *sp = key;
3252 
3253 	return (ADDRHASH(sp->rs_owner) ^ ADDRHASH(sp->rs_finfo));
3254 }
3255 
3256 static bool_t
state_owner_file_compare(rfs4_entry_t u_entry,void * key)3257 state_owner_file_compare(rfs4_entry_t u_entry, void *key)
3258 {
3259 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3260 	rfs4_state_t *arg = key;
3261 
3262 	if (sp->rs_closed == TRUE)
3263 		return (FALSE);
3264 
3265 	return (arg->rs_owner == sp->rs_owner && arg->rs_finfo == sp->rs_finfo);
3266 }
3267 
3268 static void *
state_owner_file_mkkey(rfs4_entry_t u_entry)3269 state_owner_file_mkkey(rfs4_entry_t u_entry)
3270 {
3271 	return (u_entry);
3272 }
3273 
3274 static uint32_t
state_file_hash(void * key)3275 state_file_hash(void *key)
3276 {
3277 	return (ADDRHASH(key));
3278 }
3279 
3280 static bool_t
state_file_compare(rfs4_entry_t u_entry,void * key)3281 state_file_compare(rfs4_entry_t u_entry, void *key)
3282 {
3283 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3284 	rfs4_file_t *fp = key;
3285 
3286 	if (sp->rs_closed == TRUE)
3287 		return (FALSE);
3288 
3289 	return (fp == sp->rs_finfo);
3290 }
3291 
3292 static void *
state_file_mkkey(rfs4_entry_t u_entry)3293 state_file_mkkey(rfs4_entry_t u_entry)
3294 {
3295 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3296 
3297 	return (sp->rs_finfo);
3298 }
3299 
3300 rfs4_state_t *
rfs4_findstate_by_owner_file(rfs4_openowner_t * oo,rfs4_file_t * fp,bool_t * create)3301 rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp,
3302     bool_t *create)
3303 {
3304 	rfs4_state_t *sp;
3305 	rfs4_state_t key;
3306 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
3307 
3308 	key.rs_owner = oo;
3309 	key.rs_finfo = fp;
3310 
3311 	sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_owner_file_idx,
3312 	    &key, create, &key, RFS4_DBS_VALID);
3313 
3314 	return (sp);
3315 }
3316 
3317 /* This returns ANY state struct that refers to this file */
3318 static rfs4_state_t *
rfs4_findstate_by_file(rfs4_file_t * fp)3319 rfs4_findstate_by_file(rfs4_file_t *fp)
3320 {
3321 	bool_t create = FALSE;
3322 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
3323 
3324 	return ((rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_file_idx, fp,
3325 	    &create, fp, RFS4_DBS_VALID));
3326 }
3327 
3328 static bool_t
rfs4_state_expiry(rfs4_entry_t u_entry)3329 rfs4_state_expiry(rfs4_entry_t u_entry)
3330 {
3331 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3332 
3333 	if (rfs4_dbe_is_invalid(sp->rs_dbe))
3334 		return (TRUE);
3335 
3336 	if (sp->rs_closed == TRUE &&
3337 	    ((gethrestime_sec() - rfs4_dbe_get_timerele(sp->rs_dbe))
3338 	    > rfs4_lease_time))
3339 		return (TRUE);
3340 
3341 	return ((gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access
3342 	    > rfs4_lease_time));
3343 }
3344 
3345 static bool_t
rfs4_state_create(rfs4_entry_t u_entry,void * argp)3346 rfs4_state_create(rfs4_entry_t u_entry, void *argp)
3347 {
3348 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3349 	rfs4_file_t *fp = ((rfs4_state_t *)argp)->rs_finfo;
3350 	rfs4_openowner_t *oo = ((rfs4_state_t *)argp)->rs_owner;
3351 
3352 	rfs4_dbe_hold(fp->rf_dbe);
3353 	rfs4_dbe_hold(oo->ro_dbe);
3354 	sp->rs_stateid = get_stateid(rfs4_dbe_getid(sp->rs_dbe));
3355 	sp->rs_stateid.bits.type = OPENID;
3356 	sp->rs_owner = oo;
3357 	sp->rs_finfo = fp;
3358 
3359 	list_create(&sp->rs_lostatelist, sizeof (rfs4_lo_state_t),
3360 	    offsetof(rfs4_lo_state_t, rls_node));
3361 
3362 	/* Insert state on per open owner's list */
3363 	rfs4_dbe_lock(oo->ro_dbe);
3364 	list_insert_tail(&oo->ro_statelist, sp);
3365 	rfs4_dbe_unlock(oo->ro_dbe);
3366 
3367 	return (TRUE);
3368 }
3369 
3370 static rfs4_state_t *
rfs4_findstate(stateid_t * id,rfs4_dbsearch_type_t find_invalid,bool_t lock_fp)3371 rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3372 {
3373 	rfs4_state_t *sp;
3374 	bool_t create = FALSE;
3375 	nfs4_srv_t *nsrv4 = nfs4_get_srv();
3376 
3377 	sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_idx, id,
3378 	    &create, NULL, find_invalid);
3379 	if (lock_fp == TRUE && sp != NULL)
3380 		rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER);
3381 
3382 	return (sp);
3383 }
3384 
3385 void
rfs4_state_close(rfs4_state_t * sp,bool_t lock_held,bool_t close_of_client,cred_t * cr)3386 rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, bool_t close_of_client,
3387     cred_t *cr)
3388 {
3389 	/* Remove the associated lo_state owners */
3390 	if (!lock_held)
3391 		rfs4_dbe_lock(sp->rs_dbe);
3392 
3393 	/*
3394 	 * If refcnt == 0, the dbe is about to be destroyed.
3395 	 * lock state will be released by the reaper thread.
3396 	 */
3397 
3398 	if (rfs4_dbe_refcnt(sp->rs_dbe) > 0) {
3399 		if (sp->rs_closed == FALSE) {
3400 			rfs4_release_share_lock_state(sp, cr, close_of_client);
3401 			sp->rs_closed = TRUE;
3402 		}
3403 	}
3404 
3405 	if (!lock_held)
3406 		rfs4_dbe_unlock(sp->rs_dbe);
3407 }
3408 
3409 /*
3410  * Remove all state associated with the given client.
3411  */
3412 void
rfs4_client_state_remove(rfs4_client_t * cp)3413 rfs4_client_state_remove(rfs4_client_t *cp)
3414 {
3415 	rfs4_openowner_t *oo;
3416 
3417 	rfs4_dbe_lock(cp->rc_dbe);
3418 
3419 	for (oo = list_head(&cp->rc_openownerlist); oo != NULL;
3420 	    oo = list_next(&cp->rc_openownerlist, oo)) {
3421 		rfs4_free_opens(oo, TRUE, TRUE);
3422 	}
3423 
3424 	rfs4_dbe_unlock(cp->rc_dbe);
3425 }
3426 
3427 void
rfs4_client_close(rfs4_client_t * cp)3428 rfs4_client_close(rfs4_client_t *cp)
3429 {
3430 	/* Mark client as going away. */
3431 	rfs4_dbe_lock(cp->rc_dbe);
3432 	rfs4_dbe_invalidate(cp->rc_dbe);
3433 	rfs4_dbe_unlock(cp->rc_dbe);
3434 
3435 	rfs4_client_state_remove(cp);
3436 	rfs4x_client_session_remove(cp);
3437 
3438 	/* Release the client */
3439 	rfs4_client_rele(cp);
3440 }
3441 
3442 nfsstat4
rfs4_check_clientid(clientid4 * cp,int setclid_confirm)3443 rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
3444 {
3445 	cid *cidp = (cid *) cp;
3446 	nfs4_srv_t *nsrv4;
3447 
3448 	nsrv4 = nfs4_get_srv();
3449 
3450 	/*
3451 	 * If we are booted as a cluster node, check the embedded nodeid.
3452 	 * If it indicates that this clientid was generated on another node,
3453 	 * inform the client accordingly.
3454 	 */
3455 	if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
3456 		return (NFS4ERR_STALE_CLIENTID);
3457 
3458 	/*
3459 	 * If the server start time matches the time provided
3460 	 * by the client (via the clientid) and this is NOT a
3461 	 * setclientid_confirm then return EXPIRED.
3462 	 */
3463 	if (!setclid_confirm &&
3464 	    cidp->impl_id.start_time == nsrv4->rfs4_start_time)
3465 		return (NFS4ERR_EXPIRED);
3466 
3467 	return (NFS4ERR_STALE_CLIENTID);
3468 }
3469 
3470 /*
3471  * This is used when a stateid has not been found amongst the
3472  * current server's state.  Check the stateid to see if it
3473  * was from this server instantiation or not.
3474  */
3475 static nfsstat4
what_stateid_error(stateid_t * id,stateid_type_t type)3476 what_stateid_error(stateid_t *id, stateid_type_t type)
3477 {
3478 	nfs4_srv_t *nsrv4;
3479 
3480 	nsrv4 = nfs4_get_srv();
3481 
3482 	/* If we are booted as a cluster node, was stateid locally generated? */
3483 	if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3484 		return (NFS4ERR_STALE_STATEID);
3485 
3486 	/* If types don't match then no use checking further */
3487 	if (type != id->bits.type)
3488 		return (NFS4ERR_BAD_STATEID);
3489 
3490 	/* From a different server instantiation, return STALE */
3491 	if (id->bits.boottime != nsrv4->rfs4_start_time)
3492 		return (NFS4ERR_STALE_STATEID);
3493 
3494 	/*
3495 	 * From this server but the state is most likely beyond lease
3496 	 * timeout: return NFS4ERR_EXPIRED.  However, there is the
3497 	 * case of a delegation stateid.  For delegations, there is a
3498 	 * case where the state can be removed without the client's
3499 	 * knowledge/consent: revocation.  In the case of delegation
3500 	 * revocation, the delegation state will be removed and will
3501 	 * not be found.  If the client does something like a
3502 	 * DELEGRETURN or even a READ/WRITE with a delegatoin stateid
3503 	 * that has been revoked, the server should return BAD_STATEID
3504 	 * instead of the more common EXPIRED error.
3505 	 */
3506 	if (id->bits.boottime == nsrv4->rfs4_start_time) {
3507 		if (type == DELEGID)
3508 			return (NFS4ERR_BAD_STATEID);
3509 		else
3510 			return (NFS4ERR_EXPIRED);
3511 	}
3512 
3513 	return (NFS4ERR_BAD_STATEID);
3514 }
3515 
3516 /*
3517  * Used later on to find the various state structs.  When called from
3518  * rfs4_check_stateid()->rfs4_get_all_state(), no file struct lock is
3519  * taken (it is not needed) and helps on the read/write path with
3520  * respect to performance.
3521  */
3522 static nfsstat4
rfs4_get_state_lockit(stateid4 * stateid,rfs4_state_t ** spp,rfs4_dbsearch_type_t find_invalid,bool_t lock_fp)3523 rfs4_get_state_lockit(stateid4 *stateid, rfs4_state_t **spp,
3524     rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3525 {
3526 	stateid_t *id = (stateid_t *)stateid;
3527 	rfs4_state_t *sp;
3528 
3529 	*spp = NULL;
3530 
3531 	/* If we are booted as a cluster node, was stateid locally generated? */
3532 	if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3533 		return (NFS4ERR_STALE_STATEID);
3534 
3535 	sp = rfs4_findstate(id, find_invalid, lock_fp);
3536 	if (sp == NULL) {
3537 		return (what_stateid_error(id, OPENID));
3538 	}
3539 
3540 	if (rfs4_lease_expired(sp->rs_owner->ro_client)) {
3541 		if (lock_fp == TRUE)
3542 			rfs4_state_rele(sp);
3543 		else
3544 			rfs4_state_rele_nounlock(sp);
3545 		return (NFS4ERR_EXPIRED);
3546 	}
3547 
3548 	*spp = sp;
3549 
3550 	return (NFS4_OK);
3551 }
3552 
3553 nfsstat4
rfs4_get_state(stateid4 * stateid,rfs4_state_t ** spp,rfs4_dbsearch_type_t find_invalid)3554 rfs4_get_state(stateid4 *stateid, rfs4_state_t **spp,
3555     rfs4_dbsearch_type_t find_invalid)
3556 {
3557 	return (rfs4_get_state_lockit(stateid, spp, find_invalid, TRUE));
3558 }
3559 
3560 nfsstat4
rfs4_get_state_nolock(stateid4 * stateid,rfs4_state_t ** spp,rfs4_dbsearch_type_t find_invalid)3561 rfs4_get_state_nolock(stateid4 *stateid, rfs4_state_t **spp,
3562     rfs4_dbsearch_type_t find_invalid)
3563 {
3564 	return (rfs4_get_state_lockit(stateid, spp, find_invalid, FALSE));
3565 }
3566 
3567 int
rfs4_check_stateid_seqid(rfs4_state_t * sp,stateid4 * stateid,const compound_state_t * cs)3568 rfs4_check_stateid_seqid(rfs4_state_t *sp, stateid4 *stateid,
3569     const compound_state_t *cs)
3570 {
3571 	stateid_t *id = (stateid_t *)stateid;
3572 	bool_t has_session = rfs4_has_session(cs);
3573 
3574 	if (rfs4_lease_expired(sp->rs_owner->ro_client))
3575 		return (NFS4_CHECK_STATEID_EXPIRED);
3576 
3577 	if (has_session && id->bits.chgseq == 0)
3578 		return (NFS4_CHECK_STATEID_OKAY);
3579 
3580 	/* Stateid is some time in the future - that's bad */
3581 	if (sp->rs_stateid.bits.chgseq < id->bits.chgseq)
3582 		return (NFS4_CHECK_STATEID_BAD);
3583 
3584 	if (!has_session &&
3585 	    sp->rs_stateid.bits.chgseq == id->bits.chgseq + 1) {
3586 		return (NFS4_CHECK_STATEID_REPLAY);
3587 	}
3588 
3589 	/* Stateid is some time in the past - that's old */
3590 	if (sp->rs_stateid.bits.chgseq > id->bits.chgseq)
3591 		return (NFS4_CHECK_STATEID_OLD);
3592 
3593 	/* Caller needs to know about confirmation before closure */
3594 	if (sp->rs_owner->ro_need_confirm)
3595 		return (NFS4_CHECK_STATEID_UNCONFIRMED);
3596 
3597 	if (sp->rs_closed == TRUE)
3598 		return (NFS4_CHECK_STATEID_CLOSED);
3599 
3600 	return (NFS4_CHECK_STATEID_OKAY);
3601 }
3602 
3603 int
rfs4_check_lo_stateid_seqid(rfs4_lo_state_t * lsp,stateid4 * stateid,const compound_state_t * cs)3604 rfs4_check_lo_stateid_seqid(rfs4_lo_state_t *lsp, stateid4 *stateid,
3605     const compound_state_t *cs)
3606 {
3607 	stateid_t *id = (stateid_t *)stateid;
3608 	bool_t has_session = rfs4_has_session(cs);
3609 
3610 	if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client))
3611 		return (NFS4_CHECK_STATEID_EXPIRED);
3612 
3613 	if (has_session && id->bits.chgseq == 0)
3614 		return (NFS4_CHECK_STATEID_OKAY);
3615 
3616 	/* Stateid is some time in the future - that's bad */
3617 	if (lsp->rls_lockid.bits.chgseq < id->bits.chgseq)
3618 		return (NFS4_CHECK_STATEID_BAD);
3619 
3620 	if (!has_session &&
3621 	    lsp->rls_lockid.bits.chgseq == id->bits.chgseq + 1) {
3622 		return (NFS4_CHECK_STATEID_REPLAY);
3623 	}
3624 
3625 	/* Stateid is some time in the past - that's old */
3626 	if (lsp->rls_lockid.bits.chgseq > id->bits.chgseq)
3627 		return (NFS4_CHECK_STATEID_OLD);
3628 
3629 	if (lsp->rls_state->rs_closed == TRUE)
3630 		return (NFS4_CHECK_STATEID_CLOSED);
3631 
3632 	return (NFS4_CHECK_STATEID_OKAY);
3633 }
3634 
3635 nfsstat4
rfs4_get_deleg_state(stateid4 * stateid,rfs4_deleg_state_t ** dspp)3636 rfs4_get_deleg_state(stateid4 *stateid, rfs4_deleg_state_t **dspp)
3637 {
3638 	stateid_t *id = (stateid_t *)stateid;
3639 	rfs4_deleg_state_t *dsp;
3640 
3641 	*dspp = NULL;
3642 
3643 	/* If we are booted as a cluster node, was stateid locally generated? */
3644 	if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3645 		return (NFS4ERR_STALE_STATEID);
3646 
3647 	dsp = rfs4_finddelegstate(id);
3648 	if (dsp == NULL) {
3649 		return (what_stateid_error(id, DELEGID));
3650 	}
3651 
3652 	if (rfs4_lease_expired(dsp->rds_client)) {
3653 		rfs4_deleg_state_rele(dsp);
3654 		return (NFS4ERR_EXPIRED);
3655 	}
3656 
3657 	*dspp = dsp;
3658 
3659 	return (NFS4_OK);
3660 }
3661 
3662 nfsstat4
rfs4_get_lo_state(stateid4 * stateid,rfs4_lo_state_t ** lspp,bool_t lock_fp)3663 rfs4_get_lo_state(stateid4 *stateid, rfs4_lo_state_t **lspp, bool_t lock_fp)
3664 {
3665 	stateid_t *id = (stateid_t *)stateid;
3666 	rfs4_lo_state_t *lsp;
3667 
3668 	*lspp = NULL;
3669 
3670 	/* If we are booted as a cluster node, was stateid locally generated? */
3671 	if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3672 		return (NFS4ERR_STALE_STATEID);
3673 
3674 	lsp = rfs4_findlo_state(id, lock_fp);
3675 	if (lsp == NULL) {
3676 		return (what_stateid_error(id, LOCKID));
3677 	}
3678 
3679 	if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client)) {
3680 		rfs4_lo_state_rele(lsp, lock_fp);
3681 		return (NFS4ERR_EXPIRED);
3682 	}
3683 
3684 	*lspp = lsp;
3685 
3686 	return (NFS4_OK);
3687 }
3688 
3689 static nfsstat4
rfs4_get_all_state(stateid4 * sid,rfs4_state_t ** spp,rfs4_deleg_state_t ** dspp,rfs4_lo_state_t ** lspp)3690 rfs4_get_all_state(stateid4 *sid, rfs4_state_t **spp,
3691     rfs4_deleg_state_t **dspp, rfs4_lo_state_t **lspp)
3692 {
3693 	rfs4_state_t *sp = NULL;
3694 	rfs4_deleg_state_t *dsp = NULL;
3695 	rfs4_lo_state_t *lsp = NULL;
3696 	stateid_t *id;
3697 	nfsstat4 status;
3698 
3699 	*spp = NULL; *dspp = NULL; *lspp = NULL;
3700 
3701 	id = (stateid_t *)sid;
3702 	switch (id->bits.type) {
3703 	case OPENID:
3704 		status = rfs4_get_state_lockit(sid, &sp, FALSE, FALSE);
3705 		break;
3706 	case DELEGID:
3707 		status = rfs4_get_deleg_state(sid, &dsp);
3708 		break;
3709 	case LOCKID:
3710 		status = rfs4_get_lo_state(sid, &lsp, FALSE);
3711 		if (status == NFS4_OK) {
3712 			sp = lsp->rls_state;
3713 			rfs4_dbe_hold(sp->rs_dbe);
3714 		}
3715 		break;
3716 	default:
3717 		status = NFS4ERR_BAD_STATEID;
3718 	}
3719 
3720 	if (status == NFS4_OK) {
3721 		*spp = sp;
3722 		*dspp = dsp;
3723 		*lspp = lsp;
3724 	}
3725 
3726 	return (status);
3727 }
3728 
3729 /*
3730  * Given the I/O mode (FREAD or FWRITE), this checks whether the
3731  * rfs4_state_t struct has access to do this operation and if so
3732  * return NFS4_OK; otherwise the proper NFSv4 error is returned.
3733  */
3734 nfsstat4
rfs4_state_has_access(rfs4_state_t * sp,int mode,vnode_t * vp)3735 rfs4_state_has_access(rfs4_state_t *sp, int mode, vnode_t *vp)
3736 {
3737 	nfsstat4 stat = NFS4_OK;
3738 	rfs4_file_t *fp;
3739 	bool_t create = FALSE;
3740 
3741 	rfs4_dbe_lock(sp->rs_dbe);
3742 	if (mode == FWRITE) {
3743 		if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)) {
3744 			stat = NFS4ERR_OPENMODE;
3745 		}
3746 	} else if (mode == FREAD) {
3747 		if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)) {
3748 			/*
3749 			 * If we have OPENed the file with DENYing access
3750 			 * to both READ and WRITE then no one else could
3751 			 * have OPENed the file, hence no conflicting READ
3752 			 * deny.  This check is merely an optimization.
3753 			 */
3754 			if (sp->rs_share_deny == OPEN4_SHARE_DENY_BOTH)
3755 				goto out;
3756 
3757 			/* Check against file struct's DENY mode */
3758 			fp = rfs4_findfile(vp, NULL, &create);
3759 			if (fp != NULL) {
3760 				int deny_read = 0;
3761 				rfs4_dbe_lock(fp->rf_dbe);
3762 				/*
3763 				 * Check if any other open owner has the file
3764 				 * OPENed with deny READ.
3765 				 */
3766 				if (sp->rs_share_deny & OPEN4_SHARE_DENY_READ)
3767 					deny_read = 1;
3768 				ASSERT(fp->rf_deny_read >= deny_read);
3769 				if (fp->rf_deny_read > deny_read)
3770 					stat = NFS4ERR_OPENMODE;
3771 				rfs4_dbe_unlock(fp->rf_dbe);
3772 				rfs4_file_rele(fp);
3773 			}
3774 		}
3775 	} else {
3776 		/* Illegal I/O mode */
3777 		stat = NFS4ERR_INVAL;
3778 	}
3779 out:
3780 	rfs4_dbe_unlock(sp->rs_dbe);
3781 	return (stat);
3782 }
3783 
3784 static nfsstat4
check_state_seqid(stateid_t * st,stateid_t * in,bool_t has_session)3785 check_state_seqid(stateid_t *st, stateid_t *in, bool_t has_session)
3786 {
3787 	/* rfc56661, section 8.2.2, "seqid to zero" */
3788 	if (has_session && in->bits.chgseq == 0)
3789 		return (NFS4_OK);
3790 
3791 	/* Seqid in the future? - that's bad */
3792 	if (st->bits.chgseq < in->bits.chgseq)
3793 		return (NFS4ERR_BAD_STATEID);
3794 
3795 	/* Seqid in the past? - that's old */
3796 	if (st->bits.chgseq > in->bits.chgseq)
3797 		return (NFS4ERR_OLD_STATEID);
3798 
3799 	return (NFS4_OK);
3800 }
3801 
3802 /*
3803  * Given the I/O mode (FREAD or FWRITE), the vnode, the stateid and whether
3804  * the file is being truncated, return NFS4_OK if allowed or appropriate
3805  * V4 error if not. Note NFS4ERR_DELAY will be returned and a recall on
3806  * the associated file will be done if the I/O is not consistent with any
3807  * delegation in effect on the file. Should be holding VOP_RWLOCK, either
3808  * as reader or writer as appropriate. rfs4_op_open will acquire the
3809  * VOP_RWLOCK as writer when setting up delegation. If the stateid is bad
3810  * this routine will return NFS4ERR_BAD_STATEID. In addition, through the
3811  * deleg parameter, we will return whether a write delegation is held by
3812  * the client associated with this stateid.
3813  * If the server instance associated with the relevant client is in its
3814  * grace period, return NFS4ERR_GRACE.
3815  */
3816 
3817 nfsstat4
rfs4_check_stateid(int mode,vnode_t * vp,stateid4 * stateid,bool_t trunc,bool_t * deleg,bool_t do_access,caller_context_t * ct,compound_state_t * cs)3818 rfs4_check_stateid(int mode, vnode_t *vp,
3819     stateid4 *stateid, bool_t trunc, bool_t *deleg,
3820     bool_t do_access, caller_context_t *ct, compound_state_t *cs)
3821 {
3822 	rfs4_file_t *fp;
3823 	bool_t create = FALSE;
3824 	rfs4_state_t *sp;
3825 	rfs4_deleg_state_t *dsp;
3826 	rfs4_lo_state_t *lsp;
3827 	stateid_t *id = (stateid_t *)stateid;
3828 	nfsstat4 stat = NFS4_OK;
3829 	bool_t use_ss = rfs4_has_session(cs);
3830 
3831 	if (ct != NULL) {
3832 		ct->cc_sysid = 0;
3833 		ct->cc_pid = 0;
3834 		ct->cc_caller_id = nfs4_srv_caller_id;
3835 		ct->cc_flags = CC_DONTBLOCK;
3836 	}
3837 
3838 	if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
3839 		fp = rfs4_findfile(vp, NULL, &create);
3840 		if (fp == NULL)
3841 			return (NFS4_OK);
3842 
3843 		if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
3844 			rfs4_file_rele(fp);
3845 			return (NFS4_OK);
3846 		}
3847 		if (mode == FWRITE ||
3848 		    fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
3849 			rfs4_recall_deleg(fp, trunc, NULL);
3850 			rfs4_file_rele(fp);
3851 			return (NFS4ERR_DELAY);
3852 		}
3853 		rfs4_file_rele(fp);
3854 		return (NFS4_OK);
3855 	} else {
3856 		stat = rfs4_get_all_state(stateid, &sp, &dsp, &lsp);
3857 		if (stat != NFS4_OK)
3858 			return (stat);
3859 
3860 		if (lsp != NULL) {
3861 			/* Is associated server instance in its grace period? */
3862 			if (rfs4_clnt_in_grace(lsp->rls_locker->rl_client)) {
3863 				rfs4_lo_state_rele(lsp, FALSE);
3864 				if (sp != NULL)
3865 					rfs4_state_rele_nounlock(sp);
3866 				return (NFS4ERR_GRACE);
3867 			}
3868 
3869 			ASSERT(id->bits.type == LOCKID);
3870 			stat = check_state_seqid(&lsp->rls_lockid, id, use_ss);
3871 			if (stat) {
3872 				rfs4_lo_state_rele(lsp, FALSE);
3873 				if (sp)
3874 					rfs4_state_rele_nounlock(sp);
3875 				return (stat);
3876 			}
3877 
3878 			/* Ensure specified filehandle matches */
3879 			if (lsp->rls_state->rs_finfo->rf_vp != vp) {
3880 				rfs4_lo_state_rele(lsp, FALSE);
3881 				if (sp != NULL)
3882 					rfs4_state_rele_nounlock(sp);
3883 				return (NFS4ERR_BAD_STATEID);
3884 			}
3885 
3886 			if (ct != NULL) {
3887 				ct->cc_sysid =
3888 				    lsp->rls_locker->rl_client->rc_sysidt;
3889 				ct->cc_pid = lsp->rls_locker->rl_pid;
3890 			}
3891 			rfs4_lo_state_rele(lsp, FALSE);
3892 		}
3893 
3894 		/* Stateid provided was an "open" stateid */
3895 		if (sp != NULL) {
3896 			/* Is associated server instance in its grace period? */
3897 			if (rfs4_clnt_in_grace(sp->rs_owner->ro_client)) {
3898 				rfs4_state_rele_nounlock(sp);
3899 				return (NFS4ERR_GRACE);
3900 			}
3901 			/* Skip if is here via the LOCKID */
3902 			if (id->bits.type == OPENID) {
3903 				stat = check_state_seqid(&sp->rs_stateid, id,
3904 				    use_ss);
3905 				if (stat) {
3906 					rfs4_state_rele_nounlock(sp);
3907 					return (stat);
3908 				}
3909 			}
3910 			/* Ensure specified filehandle matches */
3911 			if (sp->rs_finfo->rf_vp != vp) {
3912 				rfs4_state_rele_nounlock(sp);
3913 				return (NFS4ERR_BAD_STATEID);
3914 			}
3915 
3916 			if (sp->rs_owner->ro_need_confirm) {
3917 				rfs4_state_rele_nounlock(sp);
3918 				return (NFS4ERR_BAD_STATEID);
3919 			}
3920 
3921 			if (sp->rs_closed == TRUE) {
3922 				rfs4_state_rele_nounlock(sp);
3923 				return (NFS4ERR_OLD_STATEID);
3924 			}
3925 
3926 			if (do_access)
3927 				stat = rfs4_state_has_access(sp, mode, vp);
3928 			else
3929 				stat = NFS4_OK;
3930 
3931 			/*
3932 			 * Return whether this state has write
3933 			 * delegation if desired
3934 			 */
3935 			if (deleg && (sp->rs_finfo->rf_dinfo.rd_dtype ==
3936 			    OPEN_DELEGATE_WRITE))
3937 				*deleg = TRUE;
3938 
3939 			/*
3940 			 * We got a valid stateid, so we update the
3941 			 * lease on the client. Ideally we would like
3942 			 * to do this after the calling op succeeds,
3943 			 * but for now this will be good
3944 			 * enough. Callers of this routine are
3945 			 * currently insulated from the state stuff.
3946 			 */
3947 			rfs4_update_lease(sp->rs_owner->ro_client);
3948 
3949 			/*
3950 			 * If a delegation is present on this file and
3951 			 * this is a WRITE, then update the lastwrite
3952 			 * time to indicate that activity is present.
3953 			 */
3954 			if (sp->rs_finfo->rf_dinfo.rd_dtype ==
3955 			    OPEN_DELEGATE_WRITE &&
3956 			    mode == FWRITE) {
3957 				sp->rs_finfo->rf_dinfo.rd_time_lastwrite =
3958 				    gethrestime_sec();
3959 			}
3960 
3961 			/* Fill context for possible nbmand check */
3962 			if (ct != NULL && ct->cc_pid == 0) {
3963 				ct->cc_sysid =
3964 				    sp->rs_owner->ro_client->rc_sysidt;
3965 				ct->cc_pid =
3966 				    rfs4_dbe_getid(sp->rs_owner->ro_dbe);
3967 			}
3968 
3969 			rfs4_state_rele_nounlock(sp);
3970 
3971 			return (stat);
3972 		}
3973 
3974 		if (dsp != NULL) {
3975 			/* Is associated server instance in its grace period? */
3976 			if (rfs4_clnt_in_grace(dsp->rds_client)) {
3977 				rfs4_deleg_state_rele(dsp);
3978 				return (NFS4ERR_GRACE);
3979 			}
3980 
3981 			stat = check_state_seqid(&dsp->rds_delegid, id, use_ss);
3982 			if (stat) {
3983 				rfs4_deleg_state_rele(dsp);
3984 				return (stat);
3985 			}
3986 
3987 			/* Ensure specified filehandle matches */
3988 			if (dsp->rds_finfo->rf_vp != vp) {
3989 				rfs4_deleg_state_rele(dsp);
3990 				return (NFS4ERR_BAD_STATEID);
3991 			}
3992 			/*
3993 			 * Return whether this state has write
3994 			 * delegation if desired
3995 			 */
3996 			if (deleg && (dsp->rds_finfo->rf_dinfo.rd_dtype ==
3997 			    OPEN_DELEGATE_WRITE))
3998 				*deleg = TRUE;
3999 
4000 			rfs4_update_lease(dsp->rds_client);
4001 
4002 			/*
4003 			 * If a delegation is present on this file and
4004 			 * this is a WRITE, then update the lastwrite
4005 			 * time to indicate that activity is present.
4006 			 */
4007 			if (dsp->rds_finfo->rf_dinfo.rd_dtype ==
4008 			    OPEN_DELEGATE_WRITE && mode == FWRITE) {
4009 				dsp->rds_finfo->rf_dinfo.rd_time_lastwrite =
4010 				    gethrestime_sec();
4011 			}
4012 
4013 			/*
4014 			 * XXX - what happens if this is a WRITE and the
4015 			 * delegation type of for READ.
4016 			 */
4017 			rfs4_deleg_state_rele(dsp);
4018 
4019 			return (stat);
4020 		}
4021 		/*
4022 		 * If we got this far, something bad happened
4023 		 */
4024 		return (NFS4ERR_BAD_STATEID);
4025 	}
4026 }
4027 
4028 
4029 /*
4030  * This is a special function in that for the file struct provided the
4031  * server wants to remove/close all current state associated with the
4032  * file.  The prime use of this would be with OP_REMOVE to force the
4033  * release of state and particularly of file locks.
4034  *
4035  * There is an assumption that there is no delegations outstanding on
4036  * this file at this point.  The caller should have waited for those
4037  * to be returned or revoked.
4038  */
4039 void
rfs4_close_all_state(rfs4_file_t * fp)4040 rfs4_close_all_state(rfs4_file_t *fp)
4041 {
4042 	rfs4_state_t *sp;
4043 
4044 	rfs4_dbe_lock(fp->rf_dbe);
4045 
4046 #ifdef DEBUG
4047 	/* only applies when server is handing out delegations */
4048 	if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE)
4049 		ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
4050 #endif
4051 
4052 	/* No delegations for this file */
4053 	ASSERT(list_is_empty(&fp->rf_delegstatelist));
4054 
4055 	/* Make sure that it can not be found */
4056 	rfs4_dbe_invalidate(fp->rf_dbe);
4057 
4058 	if (fp->rf_vp == NULL) {
4059 		rfs4_dbe_unlock(fp->rf_dbe);
4060 		return;
4061 	}
4062 	rfs4_dbe_unlock(fp->rf_dbe);
4063 
4064 	/*
4065 	 * Hold as writer to prevent other server threads from
4066 	 * processing requests related to the file while all state is
4067 	 * being removed.
4068 	 */
4069 	rw_enter(&fp->rf_file_rwlock, RW_WRITER);
4070 
4071 	/* Remove ALL state from the file */
4072 	while ((sp = rfs4_findstate_by_file(fp)) != NULL) {
4073 		rfs4_state_close(sp, FALSE, FALSE, CRED());
4074 		rfs4_state_rele_nounlock(sp);
4075 	}
4076 
4077 	/*
4078 	 * This is only safe since there are no further references to
4079 	 * the file.
4080 	 */
4081 	rfs4_dbe_lock(fp->rf_dbe);
4082 	if (fp->rf_vp) {
4083 		vnode_t *vp = fp->rf_vp;
4084 
4085 		mutex_enter(&vp->v_vsd_lock);
4086 		(void) vsd_set(vp, nfs4_srv_vkey, NULL);
4087 		mutex_exit(&vp->v_vsd_lock);
4088 		VN_RELE(vp);
4089 		fp->rf_vp = NULL;
4090 	}
4091 	rfs4_dbe_unlock(fp->rf_dbe);
4092 
4093 	/* Finally let other references to proceed */
4094 	rw_exit(&fp->rf_file_rwlock);
4095 }
4096 
4097 /*
4098  * This function is used as a target for the rfs4_dbe_walk() call
4099  * below.  The purpose of this function is to see if the
4100  * lockowner_state refers to a file that resides within the exportinfo
4101  * export.  If so, then remove the lock_owner state (file locks and
4102  * share "locks") for this object since the intent is the server is
4103  * unexporting the specified directory.  Be sure to invalidate the
4104  * object after the state has been released
4105  */
4106 static void
rfs4_lo_state_walk_callout(rfs4_entry_t u_entry,void * e)4107 rfs4_lo_state_walk_callout(rfs4_entry_t u_entry, void *e)
4108 {
4109 	rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
4110 	struct exportinfo *exi = (struct exportinfo *)e;
4111 	nfs_fh4_fmt_t   fhfmt4, *exi_fhp, *finfo_fhp;
4112 	fhandle_t *efhp;
4113 
4114 	efhp = (fhandle_t *)&exi->exi_fh;
4115 	exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4116 
4117 	FH_TO_FMT4(efhp, exi_fhp);
4118 
4119 	finfo_fhp = (nfs_fh4_fmt_t *)lsp->rls_state->rs_finfo->
4120 	    rf_filehandle.nfs_fh4_val;
4121 
4122 	if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4123 	    bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4124 	    exi_fhp->fh4_xlen) == 0) {
4125 		rfs4_state_close(lsp->rls_state, FALSE, FALSE, CRED());
4126 		rfs4_dbe_invalidate(lsp->rls_dbe);
4127 		rfs4_dbe_invalidate(lsp->rls_state->rs_dbe);
4128 	}
4129 }
4130 
4131 /*
4132  * This function is used as a target for the rfs4_dbe_walk() call
4133  * below.  The purpose of this function is to see if the state refers
4134  * to a file that resides within the exportinfo export.  If so, then
4135  * remove the open state for this object since the intent is the
4136  * server is unexporting the specified directory.  The main result for
4137  * this type of entry is to invalidate it such it will not be found in
4138  * the future.
4139  */
4140 static void
rfs4_state_walk_callout(rfs4_entry_t u_entry,void * e)4141 rfs4_state_walk_callout(rfs4_entry_t u_entry, void *e)
4142 {
4143 	rfs4_state_t *sp = (rfs4_state_t *)u_entry;
4144 	struct exportinfo *exi = (struct exportinfo *)e;
4145 	nfs_fh4_fmt_t   fhfmt4, *exi_fhp, *finfo_fhp;
4146 	fhandle_t *efhp;
4147 
4148 	efhp = (fhandle_t *)&exi->exi_fh;
4149 	exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4150 
4151 	FH_TO_FMT4(efhp, exi_fhp);
4152 
4153 	finfo_fhp =
4154 	    (nfs_fh4_fmt_t *)sp->rs_finfo->rf_filehandle.nfs_fh4_val;
4155 
4156 	if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4157 	    bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4158 	    exi_fhp->fh4_xlen) == 0) {
4159 		rfs4_state_close(sp, TRUE, FALSE, CRED());
4160 		rfs4_dbe_invalidate(sp->rs_dbe);
4161 	}
4162 }
4163 
4164 /*
4165  * This function is used as a target for the rfs4_dbe_walk() call
4166  * below.  The purpose of this function is to see if the state refers
4167  * to a file that resides within the exportinfo export.  If so, then
4168  * remove the deleg state for this object since the intent is the
4169  * server is unexporting the specified directory.  The main result for
4170  * this type of entry is to invalidate it such it will not be found in
4171  * the future.
4172  */
4173 static void
rfs4_deleg_state_walk_callout(rfs4_entry_t u_entry,void * e)4174 rfs4_deleg_state_walk_callout(rfs4_entry_t u_entry, void *e)
4175 {
4176 	rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
4177 	struct exportinfo *exi = (struct exportinfo *)e;
4178 	nfs_fh4_fmt_t   fhfmt4, *exi_fhp, *finfo_fhp;
4179 	fhandle_t *efhp;
4180 
4181 	efhp = (fhandle_t *)&exi->exi_fh;
4182 	exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4183 
4184 	FH_TO_FMT4(efhp, exi_fhp);
4185 
4186 	finfo_fhp =
4187 	    (nfs_fh4_fmt_t *)dsp->rds_finfo->rf_filehandle.nfs_fh4_val;
4188 
4189 	if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4190 	    bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4191 	    exi_fhp->fh4_xlen) == 0) {
4192 		rfs4_dbe_invalidate(dsp->rds_dbe);
4193 	}
4194 }
4195 
4196 /*
4197  * This function is used as a target for the rfs4_dbe_walk() call
4198  * below.  The purpose of this function is to see if the state refers
4199  * to a file that resides within the exportinfo export.  If so, then
4200  * release vnode hold for this object since the intent is the server
4201  * is unexporting the specified directory.  Invalidation will prevent
4202  * this struct from being found in the future.
4203  */
4204 static void
rfs4_file_walk_callout(rfs4_entry_t u_entry,void * e)4205 rfs4_file_walk_callout(rfs4_entry_t u_entry, void *e)
4206 {
4207 	rfs4_file_t *fp = (rfs4_file_t *)u_entry;
4208 	struct exportinfo *exi = (struct exportinfo *)e;
4209 	nfs_fh4_fmt_t   fhfmt4, *exi_fhp, *finfo_fhp;
4210 	fhandle_t *efhp;
4211 
4212 	efhp = (fhandle_t *)&exi->exi_fh;
4213 	exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4214 
4215 	FH_TO_FMT4(efhp, exi_fhp);
4216 
4217 	finfo_fhp = (nfs_fh4_fmt_t *)fp->rf_filehandle.nfs_fh4_val;
4218 
4219 	if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4220 	    bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4221 	    exi_fhp->fh4_xlen) == 0) {
4222 		if (fp->rf_vp) {
4223 			vnode_t *vp = fp->rf_vp;
4224 
4225 			/*
4226 			 * don't leak monitors and remove the reference
4227 			 * put on the vnode when the delegation was granted.
4228 			 */
4229 			if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ) {
4230 				(void) fem_uninstall(vp, deleg_rdops,
4231 				    (void *)fp);
4232 				vn_open_downgrade(vp, FREAD);
4233 			} else if (fp->rf_dinfo.rd_dtype ==
4234 			    OPEN_DELEGATE_WRITE) {
4235 				(void) fem_uninstall(vp, deleg_wrops,
4236 				    (void *)fp);
4237 				vn_open_downgrade(vp, FREAD|FWRITE);
4238 			}
4239 			mutex_enter(&vp->v_vsd_lock);
4240 			(void) vsd_set(vp, nfs4_srv_vkey, NULL);
4241 			mutex_exit(&vp->v_vsd_lock);
4242 			VN_RELE(vp);
4243 			fp->rf_vp = NULL;
4244 		}
4245 		rfs4_dbe_invalidate(fp->rf_dbe);
4246 	}
4247 }
4248 
4249 /*
4250  * Given a directory that is being unexported, cleanup/release all
4251  * state in the server that refers to objects residing underneath this
4252  * particular export.  The ordering of the release is important.
4253  * Lock_owner, then state and then file.
4254  *
4255  * NFS zones note: nfs_export.c:unexport() calls this from a
4256  * thread in the global zone for NGZ data structures, so we
4257  * CANNOT use zone_getspecific anywhere in this code path.
4258  */
4259 void
rfs4_clean_state_exi(nfs_export_t * ne,struct exportinfo * exi)4260 rfs4_clean_state_exi(nfs_export_t *ne, struct exportinfo *exi)
4261 {
4262 	nfs_globals_t *ng;
4263 	nfs4_srv_t *nsrv4;
4264 
4265 	ng = ne->ne_globals;
4266 	ASSERT(ng->nfs_zoneid == exi->exi_zoneid);
4267 	nsrv4 = ng->nfs4_srv;
4268 
4269 	mutex_enter(&nsrv4->state_lock);
4270 
4271 	if (nsrv4->nfs4_server_state == NULL) {
4272 		mutex_exit(&nsrv4->state_lock);
4273 		return;
4274 	}
4275 
4276 	rfs4_dbe_walk(nsrv4->rfs4_lo_state_tab,
4277 	    rfs4_lo_state_walk_callout, exi);
4278 	rfs4_dbe_walk(nsrv4->rfs4_state_tab, rfs4_state_walk_callout, exi);
4279 	rfs4_dbe_walk(nsrv4->rfs4_deleg_state_tab,
4280 	    rfs4_deleg_state_walk_callout, exi);
4281 	rfs4_dbe_walk(nsrv4->rfs4_file_tab, rfs4_file_walk_callout, exi);
4282 
4283 	mutex_exit(&nsrv4->state_lock);
4284 }
4285