1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright 2018 Nexenta Systems, Inc.
28 * Copyright 2019 Nexenta by DDN, Inc.
29 * Copyright 2020 RackTop Systems, Inc.
30 * Copyright 2023 MNX Cloud, Inc.
31 */
32
33 #include <sys/systm.h>
34 #include <sys/kmem.h>
35 #include <sys/cmn_err.h>
36 #include <sys/atomic.h>
37 #include <sys/clconf.h>
38 #include <sys/cladm.h>
39 #include <sys/flock.h>
40 #include <nfs/export.h>
41 #include <nfs/nfs.h>
42 #include <nfs/nfs4.h>
43 #include <nfs/nfssys.h>
44 #include <nfs/lm.h>
45 #include <sys/pathname.h>
46 #include <sys/sdt.h>
47 #include <sys/nvpair.h>
48
49 extern u_longlong_t nfs4_srv_caller_id;
50
51 extern uint_t nfs4_srv_vkey;
52
53 stateid4 special0 = {
54 0,
55 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
56 };
57
58 stateid4 special1 = {
59 0xffffffff,
60 {
61 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
62 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
63 (char)0xff, (char)0xff, (char)0xff, (char)0xff
64 }
65 };
66
67
68 #define ISSPECIAL(id) (stateid4_cmp(id, &special0) || \
69 stateid4_cmp(id, &special1))
70
71 /* For embedding the cluster nodeid into our clientid */
72 #define CLUSTER_NODEID_SHIFT 24
73 #define CLUSTER_MAX_NODEID 255
74
75 #ifdef DEBUG
76 int rfs4_debug;
77 #endif
78
79 rfs4_db_mem_cache_t rfs4_db_mem_cache_table[RFS4_DB_MEM_CACHE_NUM];
80 static uint32_t rfs4_database_debug = 0x00;
81
82 /* CSTYLED */
83 static void rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf);
84 static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf);
85 static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip);
86 static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip);
87
88 /*
89 * Couple of simple init/destroy functions for a general waiter
90 */
91 void
rfs4_sw_init(rfs4_state_wait_t * swp)92 rfs4_sw_init(rfs4_state_wait_t *swp)
93 {
94 mutex_init(swp->sw_cv_lock, NULL, MUTEX_DEFAULT, NULL);
95 cv_init(swp->sw_cv, NULL, CV_DEFAULT, NULL);
96 swp->sw_active = FALSE;
97 swp->sw_wait_count = 0;
98 }
99
100 void
rfs4_sw_destroy(rfs4_state_wait_t * swp)101 rfs4_sw_destroy(rfs4_state_wait_t *swp)
102 {
103 mutex_destroy(swp->sw_cv_lock);
104 cv_destroy(swp->sw_cv);
105 }
106
107 void
rfs4_sw_enter(rfs4_state_wait_t * swp)108 rfs4_sw_enter(rfs4_state_wait_t *swp)
109 {
110 mutex_enter(swp->sw_cv_lock);
111 while (swp->sw_active) {
112 swp->sw_wait_count++;
113 cv_wait(swp->sw_cv, swp->sw_cv_lock);
114 swp->sw_wait_count--;
115 }
116 ASSERT(swp->sw_active == FALSE);
117 swp->sw_active = TRUE;
118 mutex_exit(swp->sw_cv_lock);
119 }
120
121 void
rfs4_sw_exit(rfs4_state_wait_t * swp)122 rfs4_sw_exit(rfs4_state_wait_t *swp)
123 {
124 mutex_enter(swp->sw_cv_lock);
125 ASSERT(swp->sw_active == TRUE);
126 swp->sw_active = FALSE;
127 if (swp->sw_wait_count != 0)
128 cv_broadcast(swp->sw_cv);
129 mutex_exit(swp->sw_cv_lock);
130 }
131
132 static void
deep_lock_copy(LOCK4res * dres,LOCK4res * sres)133 deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
134 {
135 lock_owner4 *slo = &sres->LOCK4res_u.denied.owner;
136 lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner;
137
138 if (sres->status == NFS4ERR_DENIED) {
139 dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP);
140 bcopy(slo->owner_val, dlo->owner_val, slo->owner_len);
141 }
142 }
143
144 /*
145 * CPR callback id -- not related to v4 callbacks
146 */
147 static callb_id_t cpr_id = 0;
148
149 static void
deep_lock_free(LOCK4res * res)150 deep_lock_free(LOCK4res *res)
151 {
152 lock_owner4 *lo = &res->LOCK4res_u.denied.owner;
153
154 if (res->status == NFS4ERR_DENIED)
155 kmem_free(lo->owner_val, lo->owner_len);
156 }
157
158 static void
deep_open_copy(OPEN4res * dres,OPEN4res * sres)159 deep_open_copy(OPEN4res *dres, OPEN4res *sres)
160 {
161 nfsace4 *sacep, *dacep;
162
163 if (sres->status != NFS4_OK) {
164 return;
165 }
166
167 dres->attrset = sres->attrset;
168
169 switch (sres->delegation.delegation_type) {
170 case OPEN_DELEGATE_NONE:
171 return;
172 case OPEN_DELEGATE_READ:
173 sacep = &sres->delegation.open_delegation4_u.read.permissions;
174 dacep = &dres->delegation.open_delegation4_u.read.permissions;
175 break;
176 case OPEN_DELEGATE_WRITE:
177 sacep = &sres->delegation.open_delegation4_u.write.permissions;
178 dacep = &dres->delegation.open_delegation4_u.write.permissions;
179 break;
180 }
181 dacep->who.utf8string_val =
182 kmem_alloc(sacep->who.utf8string_len, KM_SLEEP);
183 bcopy(sacep->who.utf8string_val, dacep->who.utf8string_val,
184 sacep->who.utf8string_len);
185 }
186
187 static void
deep_open_free(OPEN4res * res)188 deep_open_free(OPEN4res *res)
189 {
190 nfsace4 *acep;
191 if (res->status != NFS4_OK)
192 return;
193
194 switch (res->delegation.delegation_type) {
195 case OPEN_DELEGATE_NONE:
196 return;
197 case OPEN_DELEGATE_READ:
198 acep = &res->delegation.open_delegation4_u.read.permissions;
199 break;
200 case OPEN_DELEGATE_WRITE:
201 acep = &res->delegation.open_delegation4_u.write.permissions;
202 break;
203 }
204
205 if (acep->who.utf8string_val) {
206 kmem_free(acep->who.utf8string_val, acep->who.utf8string_len);
207 acep->who.utf8string_val = NULL;
208 }
209 }
210
211 void
rfs4_free_reply(nfs_resop4 * rp)212 rfs4_free_reply(nfs_resop4 *rp)
213 {
214 switch (rp->resop) {
215 case OP_LOCK:
216 deep_lock_free(&rp->nfs_resop4_u.oplock);
217 break;
218 case OP_OPEN:
219 deep_open_free(&rp->nfs_resop4_u.opopen);
220 default:
221 break;
222 }
223 }
224
225 void
rfs4_copy_reply(nfs_resop4 * dst,nfs_resop4 * src)226 rfs4_copy_reply(nfs_resop4 *dst, nfs_resop4 *src)
227 {
228 *dst = *src;
229
230 /* Handle responses that need deep copy */
231 switch (src->resop) {
232 case OP_LOCK:
233 deep_lock_copy(&dst->nfs_resop4_u.oplock,
234 &src->nfs_resop4_u.oplock);
235 break;
236 case OP_OPEN:
237 deep_open_copy(&dst->nfs_resop4_u.opopen,
238 &src->nfs_resop4_u.opopen);
239 break;
240 default:
241 break;
242 };
243 }
244
245 /*
246 * This is the implementation of the underlying state engine. The
247 * public interface to this engine is described by
248 * nfs4_state.h. Callers to the engine should hold no state engine
249 * locks when they call in to it. If the protocol needs to lock data
250 * structures it should do so after acquiring all references to them
251 * first and then follow the following lock order:
252 *
253 * client > openowner > state > lo_state > lockowner > file.
254 *
255 * Internally we only allow a thread to hold one hash bucket lock at a
256 * time and the lock is higher in the lock order (must be acquired
257 * first) than the data structure that is on that hash list.
258 *
259 * If a new reference was acquired by the caller, that reference needs
260 * to be released after releasing all acquired locks with the
261 * corresponding rfs4_*_rele routine.
262 */
263
264 /*
265 * This code is some what prototypical for now. Its purpose currently is to
266 * implement the interfaces sufficiently to finish the higher protocol
267 * elements. This will be replaced by a dynamically resizeable tables
268 * backed by kmem_cache allocator. However synchronization is handled
269 * correctly (I hope) and will not change by much. The mutexes for
270 * the hash buckets that can be used to create new instances of data
271 * structures might be good candidates to evolve into reader writer
272 * locks. If it has to do a creation, it would be holding the
273 * mutex across a kmem_alloc with KM_SLEEP specified.
274 */
275
276 #ifdef DEBUG
277 #define TABSIZE 17
278 #else
279 #define TABSIZE 2047
280 #endif
281
282 #define ADDRHASH(key) ((unsigned long)(key) >> 3)
283
284 #define MAXTABSZ 1024*1024
285
286 /* The values below are rfs4_lease_time units */
287
288 #ifdef DEBUG
289 #define CLIENT_CACHE_TIME 1
290 #define OPENOWNER_CACHE_TIME 1
291 #define STATE_CACHE_TIME 1
292 #define LO_STATE_CACHE_TIME 1
293 #define LOCKOWNER_CACHE_TIME 1
294 #define FILE_CACHE_TIME 3
295 #define DELEG_STATE_CACHE_TIME 1
296 #else
297 #define CLIENT_CACHE_TIME 10
298 #define OPENOWNER_CACHE_TIME 5
299 #define STATE_CACHE_TIME 1
300 #define LO_STATE_CACHE_TIME 1
301 #define LOCKOWNER_CACHE_TIME 3
302 #define FILE_CACHE_TIME 40
303 #define DELEG_STATE_CACHE_TIME 1
304 #endif
305
306 /*
307 * NFSv4 server state databases
308 *
309 * Initilized when the module is loaded and used by NFSv4 state tables.
310 * These kmem_cache databases are global, the tables that make use of these
311 * are per zone.
312 */
313 kmem_cache_t *rfs4_client_mem_cache;
314 kmem_cache_t *rfs4_clntIP_mem_cache;
315 kmem_cache_t *rfs4_openown_mem_cache;
316 kmem_cache_t *rfs4_openstID_mem_cache;
317 kmem_cache_t *rfs4_lockstID_mem_cache;
318 kmem_cache_t *rfs4_lockown_mem_cache;
319 kmem_cache_t *rfs4_file_mem_cache;
320 kmem_cache_t *rfs4_delegstID_mem_cache;
321 kmem_cache_t *rfs4_session_mem_cache;
322
323 /*
324 * NFSv4 state table functions
325 */
326 static bool_t rfs4_client_create(rfs4_entry_t, void *);
327 static void rfs4_dss_remove_cpleaf(rfs4_client_t *);
328 static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *);
329 static void rfs4_client_destroy(rfs4_entry_t);
330 static bool_t rfs4_client_expiry(rfs4_entry_t);
331 static uint32_t clientid_hash(void *);
332 static bool_t clientid_compare(rfs4_entry_t, void *);
333 static void *clientid_mkkey(rfs4_entry_t);
334 static uint32_t nfsclnt_hash(void *);
335 static bool_t nfsclnt_compare(rfs4_entry_t, void *);
336 static void *nfsclnt_mkkey(rfs4_entry_t);
337 static bool_t rfs4_clntip_expiry(rfs4_entry_t);
338 static void rfs4_clntip_destroy(rfs4_entry_t);
339 static bool_t rfs4_clntip_create(rfs4_entry_t, void *);
340 static uint32_t clntip_hash(void *);
341 static bool_t clntip_compare(rfs4_entry_t, void *);
342 static void *clntip_mkkey(rfs4_entry_t);
343 static bool_t rfs4_openowner_create(rfs4_entry_t, void *);
344 static void rfs4_openowner_destroy(rfs4_entry_t);
345 static bool_t rfs4_openowner_expiry(rfs4_entry_t);
346 static uint32_t openowner_hash(void *);
347 static bool_t openowner_compare(rfs4_entry_t, void *);
348 static void *openowner_mkkey(rfs4_entry_t);
349 static bool_t rfs4_state_create(rfs4_entry_t, void *);
350 static void rfs4_state_destroy(rfs4_entry_t);
351 static bool_t rfs4_state_expiry(rfs4_entry_t);
352 static uint32_t state_hash(void *);
353 static bool_t state_compare(rfs4_entry_t, void *);
354 static void *state_mkkey(rfs4_entry_t);
355 static uint32_t state_owner_file_hash(void *);
356 static bool_t state_owner_file_compare(rfs4_entry_t, void *);
357 static void *state_owner_file_mkkey(rfs4_entry_t);
358 static uint32_t state_file_hash(void *);
359 static bool_t state_file_compare(rfs4_entry_t, void *);
360 static void *state_file_mkkey(rfs4_entry_t);
361 static bool_t rfs4_lo_state_create(rfs4_entry_t, void *);
362 static void rfs4_lo_state_destroy(rfs4_entry_t);
363 static bool_t rfs4_lo_state_expiry(rfs4_entry_t);
364 static uint32_t lo_state_hash(void *);
365 static bool_t lo_state_compare(rfs4_entry_t, void *);
366 static void *lo_state_mkkey(rfs4_entry_t);
367 static uint32_t lo_state_lo_hash(void *);
368 static bool_t lo_state_lo_compare(rfs4_entry_t, void *);
369 static void *lo_state_lo_mkkey(rfs4_entry_t);
370 static bool_t rfs4_lockowner_create(rfs4_entry_t, void *);
371 static void rfs4_lockowner_destroy(rfs4_entry_t);
372 static bool_t rfs4_lockowner_expiry(rfs4_entry_t);
373 static uint32_t lockowner_hash(void *);
374 static bool_t lockowner_compare(rfs4_entry_t, void *);
375 static void *lockowner_mkkey(rfs4_entry_t);
376 static uint32_t pid_hash(void *);
377 static bool_t pid_compare(rfs4_entry_t, void *);
378 static void *pid_mkkey(rfs4_entry_t);
379 static bool_t rfs4_file_create(rfs4_entry_t, void *);
380 static void rfs4_file_destroy(rfs4_entry_t);
381 static uint32_t file_hash(void *);
382 static bool_t file_compare(rfs4_entry_t, void *);
383 static void *file_mkkey(rfs4_entry_t);
384 static bool_t rfs4_deleg_state_create(rfs4_entry_t, void *);
385 static void rfs4_deleg_state_destroy(rfs4_entry_t);
386 static bool_t rfs4_deleg_state_expiry(rfs4_entry_t);
387 static uint32_t deleg_hash(void *);
388 static bool_t deleg_compare(rfs4_entry_t, void *);
389 static void *deleg_mkkey(rfs4_entry_t);
390 static uint32_t deleg_state_hash(void *);
391 static bool_t deleg_state_compare(rfs4_entry_t, void *);
392 static void *deleg_state_mkkey(rfs4_entry_t);
393
394 static void rfs4_state_rele_nounlock(rfs4_state_t *);
395
396 static int rfs4_ss_enabled = 0;
397
398 void
rfs4_ss_pnfree(rfs4_ss_pn_t * ss_pn)399 rfs4_ss_pnfree(rfs4_ss_pn_t *ss_pn)
400 {
401 kmem_free(ss_pn, sizeof (rfs4_ss_pn_t));
402 }
403
404 static rfs4_ss_pn_t *
rfs4_ss_pnalloc(char * dir,char * leaf)405 rfs4_ss_pnalloc(char *dir, char *leaf)
406 {
407 rfs4_ss_pn_t *ss_pn;
408 int dir_len, leaf_len;
409
410 /*
411 * validate we have a resonable path
412 * (account for the '/' and trailing null)
413 */
414 if ((dir_len = strlen(dir)) > MAXPATHLEN ||
415 (leaf_len = strlen(leaf)) > MAXNAMELEN ||
416 (dir_len + leaf_len + 2) > MAXPATHLEN) {
417 return (NULL);
418 }
419
420 ss_pn = kmem_alloc(sizeof (rfs4_ss_pn_t), KM_SLEEP);
421
422 (void) snprintf(ss_pn->pn, MAXPATHLEN, "%s/%s", dir, leaf);
423 /* Handy pointer to just the leaf name */
424 ss_pn->leaf = ss_pn->pn + dir_len + 1;
425 return (ss_pn);
426 }
427
428
429 /*
430 * Move the "leaf" filename from "sdir" directory
431 * to the "ddir" directory. Return the pathname of
432 * the destination unless the rename fails in which
433 * case we need to return the source pathname.
434 */
435 static rfs4_ss_pn_t *
rfs4_ss_movestate(char * sdir,char * ddir,char * leaf)436 rfs4_ss_movestate(char *sdir, char *ddir, char *leaf)
437 {
438 rfs4_ss_pn_t *src, *dst;
439
440 if ((src = rfs4_ss_pnalloc(sdir, leaf)) == NULL)
441 return (NULL);
442
443 if ((dst = rfs4_ss_pnalloc(ddir, leaf)) == NULL) {
444 rfs4_ss_pnfree(src);
445 return (NULL);
446 }
447
448 /*
449 * If the rename fails we shall return the src
450 * pathname and free the dst. Otherwise we need
451 * to free the src and return the dst pathanme.
452 */
453 if (vn_rename(src->pn, dst->pn, UIO_SYSSPACE)) {
454 rfs4_ss_pnfree(dst);
455 return (src);
456 }
457 rfs4_ss_pnfree(src);
458 return (dst);
459 }
460
461
462 static rfs4_oldstate_t *
rfs4_ss_getstate(vnode_t * dvp,rfs4_ss_pn_t * ss_pn)463 rfs4_ss_getstate(vnode_t *dvp, rfs4_ss_pn_t *ss_pn)
464 {
465 struct uio uio;
466 struct iovec iov[3];
467
468 rfs4_oldstate_t *cl_ss = NULL;
469 vnode_t *vp;
470 vattr_t va;
471 uint_t id_len;
472 int err, kill_file, file_vers;
473
474 if (ss_pn == NULL)
475 return (NULL);
476
477 /*
478 * open the state file.
479 */
480 if (vn_open(ss_pn->pn, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0) != 0) {
481 return (NULL);
482 }
483
484 if (vp->v_type != VREG) {
485 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
486 VN_RELE(vp);
487 return (NULL);
488 }
489
490 err = VOP_ACCESS(vp, VREAD, 0, CRED(), NULL);
491 if (err) {
492 /*
493 * We don't have read access? better get the heck out.
494 */
495 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
496 VN_RELE(vp);
497 return (NULL);
498 }
499
500 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
501 /*
502 * get the file size to do some basic validation
503 */
504 va.va_mask = AT_SIZE;
505 err = VOP_GETATTR(vp, &va, 0, CRED(), NULL);
506
507 kill_file = (va.va_size == 0 || va.va_size <
508 (NFS4_VERIFIER_SIZE + sizeof (uint_t)+1));
509
510 if (err || kill_file) {
511 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
512 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
513 VN_RELE(vp);
514 if (kill_file) {
515 (void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0);
516 }
517 return (NULL);
518 }
519
520 cl_ss = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
521
522 /*
523 * build iovecs to read in the file_version, verifier and id_len
524 */
525 iov[0].iov_base = (caddr_t)&file_vers;
526 iov[0].iov_len = sizeof (int);
527 iov[1].iov_base = (caddr_t)&cl_ss->cl_id4.verifier;
528 iov[1].iov_len = NFS4_VERIFIER_SIZE;
529 iov[2].iov_base = (caddr_t)&id_len;
530 iov[2].iov_len = sizeof (uint_t);
531
532 uio.uio_iov = iov;
533 uio.uio_iovcnt = 3;
534 uio.uio_segflg = UIO_SYSSPACE;
535 uio.uio_loffset = 0;
536 uio.uio_resid = sizeof (int) + NFS4_VERIFIER_SIZE + sizeof (uint_t);
537
538 err = VOP_READ(vp, &uio, FREAD, CRED(), NULL);
539 if (err != 0) {
540 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
541 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
542 VN_RELE(vp);
543 kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
544 return (NULL);
545 }
546
547 /*
548 * if the file_version doesn't match or if the
549 * id_len is zero or the combination of the verifier,
550 * id_len and id_val is bigger than the file we have
551 * a problem. If so ditch the file.
552 */
553 kill_file = (file_vers != NFS4_SS_VERSION || id_len == 0 ||
554 (id_len + NFS4_VERIFIER_SIZE + sizeof (uint_t)) > va.va_size);
555
556 if (err || kill_file) {
557 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
558 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
559 VN_RELE(vp);
560 kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
561 if (kill_file) {
562 (void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0);
563 }
564 return (NULL);
565 }
566
567 /*
568 * now get the client id value
569 */
570 cl_ss->cl_id4.id_val = kmem_alloc(id_len, KM_SLEEP);
571 iov[0].iov_base = cl_ss->cl_id4.id_val;
572 iov[0].iov_len = id_len;
573
574 uio.uio_iov = iov;
575 uio.uio_iovcnt = 1;
576 uio.uio_segflg = UIO_SYSSPACE;
577 uio.uio_resid = cl_ss->cl_id4.id_len = id_len;
578
579 err = VOP_READ(vp, &uio, FREAD, CRED(), NULL);
580 if (err != 0) {
581 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
582 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
583 VN_RELE(vp);
584 kmem_free(cl_ss->cl_id4.id_val, id_len);
585 kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
586 return (NULL);
587 }
588
589 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
590 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
591 VN_RELE(vp);
592 return (cl_ss);
593 }
594
595 #ifdef nextdp
596 #undef nextdp
597 #endif
598 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
599
600 /*
601 * Check whether list already contains the client
602 * This protects against counting the same client twice.
603 */
604 static bool_t
rfs4_ss_has_client(rfs4_oldstate_t * head,nfs_client_id4 * client)605 rfs4_ss_has_client(rfs4_oldstate_t *head, nfs_client_id4 *client)
606 {
607 rfs4_oldstate_t *p;
608
609 for (p = head->next; p != head; p = p->next) {
610 nfs_client_id4 *m = &p->cl_id4;
611
612 if (m->id_len != client->id_len)
613 continue;
614
615 if (bcmp(m->id_val, client->id_val, client->id_len) == 0)
616 continue;
617
618 /* client ids match */
619 return (TRUE);
620 }
621
622 return (FALSE);
623 }
624
625 /*
626 * Add entries from statedir to supplied oldstate list.
627 * Optionally, move all entries from statedir -> destdir.
628 */
629 static void
rfs4_ss_oldstate(rfs4_oldstate_t * oldstate,char * statedir,char * destdir)630 rfs4_ss_oldstate(rfs4_oldstate_t *oldstate, char *statedir, char *destdir)
631 {
632 rfs4_ss_pn_t *ss_pn;
633 rfs4_oldstate_t *cl_ss = NULL;
634 char *dirt = NULL;
635 int err, dir_eof = 0, size = 0;
636 vnode_t *dvp;
637 struct iovec iov;
638 struct uio uio;
639 struct dirent64 *dep;
640 offset_t dirchunk_offset = 0;
641 unsigned int nclients = 0;
642
643 /*
644 * open the state directory
645 */
646 if (vn_open(statedir, UIO_SYSSPACE, FREAD, 0, &dvp, 0, 0))
647 return;
648
649 if (dvp->v_type != VDIR || VOP_ACCESS(dvp, VREAD, 0, CRED(), NULL))
650 goto out;
651
652 dirt = kmem_alloc(RFS4_SS_DIRSIZE, KM_SLEEP);
653
654 /*
655 * Get and process the directory entries
656 */
657 while (!dir_eof) {
658 (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL);
659 iov.iov_base = dirt;
660 iov.iov_len = RFS4_SS_DIRSIZE;
661 uio.uio_iov = &iov;
662 uio.uio_iovcnt = 1;
663 uio.uio_segflg = UIO_SYSSPACE;
664 uio.uio_loffset = dirchunk_offset;
665 uio.uio_resid = RFS4_SS_DIRSIZE;
666
667 err = VOP_READDIR(dvp, &uio, CRED(), &dir_eof, NULL, 0);
668 VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
669 if (err)
670 goto out;
671
672 size = RFS4_SS_DIRSIZE - uio.uio_resid;
673
674 /*
675 * Process all the directory entries in this
676 * readdir chunk
677 */
678 for (dep = (struct dirent64 *)dirt; size > 0;
679 dep = nextdp(dep)) {
680
681 size -= dep->d_reclen;
682 dirchunk_offset = dep->d_off;
683
684 /*
685 * Skip '.' and '..'
686 */
687 if (NFS_IS_DOTNAME(dep->d_name))
688 continue;
689
690 ss_pn = rfs4_ss_pnalloc(statedir, dep->d_name);
691 if (ss_pn == NULL)
692 continue;
693
694 cl_ss = rfs4_ss_getstate(dvp, ss_pn);
695 if (cl_ss != NULL) {
696 if (destdir != NULL) {
697 rfs4_ss_pnfree(ss_pn);
698 cl_ss->ss_pn = rfs4_ss_movestate(
699 statedir, destdir, dep->d_name);
700 } else {
701 cl_ss->ss_pn = ss_pn;
702 }
703
704 if (!rfs4_ss_has_client(oldstate,
705 &cl_ss->cl_id4))
706 nclients++;
707
708 insque(cl_ss, oldstate);
709 } else {
710 rfs4_ss_pnfree(ss_pn);
711 }
712 }
713 }
714
715 out:
716 (void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL);
717 VN_RELE(dvp);
718 if (dirt)
719 kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE);
720
721 if (nclients > 0) {
722 nfs4_srv_t *nsrv4 = nfs4_get_srv();
723
724 atomic_add_32(&(nsrv4->nfs4_cur_servinst->nreclaim), nclients);
725 }
726 }
727
728 static void
rfs4_ss_init(nfs4_srv_t * nsrv4)729 rfs4_ss_init(nfs4_srv_t *nsrv4)
730 {
731 int npaths = 1;
732 char *default_dss_path = NFS4_DSS_VAR_DIR;
733
734 /* read the default stable storage state */
735 rfs4_dss_readstate(nsrv4, npaths, &default_dss_path);
736
737 rfs4_ss_enabled = 1;
738 }
739
740 static void
rfs4_ss_fini(nfs4_srv_t * nsrv4)741 rfs4_ss_fini(nfs4_srv_t *nsrv4)
742 {
743 rfs4_servinst_t *sip;
744
745 mutex_enter(&nsrv4->servinst_lock);
746 sip = nsrv4->nfs4_cur_servinst;
747 while (sip != NULL) {
748 rfs4_dss_clear_oldstate(sip);
749 sip = sip->next;
750 }
751 mutex_exit(&nsrv4->servinst_lock);
752 }
753
754 /*
755 * Remove all oldstate files referenced by this servinst.
756 */
757 static void
rfs4_dss_clear_oldstate(rfs4_servinst_t * sip)758 rfs4_dss_clear_oldstate(rfs4_servinst_t *sip)
759 {
760 rfs4_oldstate_t *os_head, *osp;
761
762 rw_enter(&sip->oldstate_lock, RW_WRITER);
763 os_head = sip->oldstate;
764
765 if (os_head == NULL) {
766 rw_exit(&sip->oldstate_lock);
767 return;
768 }
769
770 /* skip dummy entry */
771 osp = os_head->next;
772 while (osp != os_head) {
773 char *leaf = osp->ss_pn->leaf;
774 rfs4_oldstate_t *os_next;
775
776 rfs4_dss_remove_leaf(sip, NFS4_DSS_OLDSTATE_LEAF, leaf);
777
778 if (osp->cl_id4.id_val)
779 kmem_free(osp->cl_id4.id_val, osp->cl_id4.id_len);
780 rfs4_ss_pnfree(osp->ss_pn);
781
782 os_next = osp->next;
783 remque(osp);
784 kmem_free(osp, sizeof (rfs4_oldstate_t));
785 osp = os_next;
786 }
787
788 rw_exit(&sip->oldstate_lock);
789 }
790
791 /*
792 * Form the state and oldstate paths, and read in the stable storage files.
793 */
794 void
rfs4_dss_readstate(nfs4_srv_t * nsrv4,int npaths,char ** paths)795 rfs4_dss_readstate(nfs4_srv_t *nsrv4, int npaths, char **paths)
796 {
797 int i;
798 char *state, *oldstate;
799
800 state = kmem_alloc(MAXPATHLEN, KM_SLEEP);
801 oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP);
802
803 for (i = 0; i < npaths; i++) {
804 char *path = paths[i];
805
806 (void) sprintf(state, "%s/%s", path, NFS4_DSS_STATE_LEAF);
807 (void) sprintf(oldstate, "%s/%s", path, NFS4_DSS_OLDSTATE_LEAF);
808
809 /*
810 * Populate the current server instance's oldstate list.
811 *
812 * 1. Read stable storage data from old state directory,
813 * leaving its contents alone.
814 *
815 * 2. Read stable storage data from state directory,
816 * and move the latter's contents to old state
817 * directory.
818 */
819 rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate,
820 oldstate, NULL);
821 rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate,
822 state, oldstate);
823 }
824
825 kmem_free(state, MAXPATHLEN);
826 kmem_free(oldstate, MAXPATHLEN);
827 }
828
829
830 /*
831 * Check if we are still in grace and if the client can be
832 * granted permission to perform reclaims.
833 */
834 void
rfs4_ss_chkclid(nfs4_srv_t * nsrv4,rfs4_client_t * cp)835 rfs4_ss_chkclid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
836 {
837 rfs4_servinst_t *sip;
838
839 /*
840 * It should be sufficient to check the oldstate data for just
841 * this client's instance. However, since our per-instance
842 * client grouping is solely temporal, HA-NFSv4 RG failover
843 * might result in clients of the same RG being partitioned into
844 * separate instances.
845 *
846 * Until the client grouping is improved, we must check the
847 * oldstate data for all instances with an active grace period.
848 *
849 * This also serves as the mechanism to remove stale oldstate data.
850 * The first time we check an instance after its grace period has
851 * expired, the oldstate data should be cleared.
852 *
853 * Start at the current instance, and walk the list backwards
854 * to the first.
855 */
856 mutex_enter(&nsrv4->servinst_lock);
857 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
858 rfs4_ss_chkclid_sip(cp, sip);
859
860 /* if the above check found this client, we're done */
861 if (cp->rc_can_reclaim)
862 break;
863 }
864 mutex_exit(&nsrv4->servinst_lock);
865 }
866
867 static void
rfs4_ss_chkclid_sip(rfs4_client_t * cp,rfs4_servinst_t * sip)868 rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip)
869 {
870 rfs4_oldstate_t *osp, *os_head;
871
872 /* short circuit everything if this server instance has no oldstate */
873 rw_enter(&sip->oldstate_lock, RW_READER);
874 os_head = sip->oldstate;
875 rw_exit(&sip->oldstate_lock);
876 if (os_head == NULL)
877 return;
878
879 /*
880 * If this server instance is no longer in a grace period then
881 * the client won't be able to reclaim. No further need for this
882 * instance's oldstate data, so it can be cleared.
883 */
884 if (!rfs4_servinst_in_grace(sip))
885 return;
886
887 /* this instance is still in grace; search for the clientid */
888
889 rw_enter(&sip->oldstate_lock, RW_READER);
890
891 os_head = sip->oldstate;
892 /* skip dummy entry */
893 osp = os_head->next;
894 while (osp != os_head) {
895 if (osp->cl_id4.id_len == cp->rc_nfs_client.id_len) {
896 if (bcmp(osp->cl_id4.id_val, cp->rc_nfs_client.id_val,
897 osp->cl_id4.id_len) == 0) {
898 cp->rc_can_reclaim = 1;
899 break;
900 }
901 }
902 osp = osp->next;
903 }
904
905 rw_exit(&sip->oldstate_lock);
906 }
907
908 /*
909 * Place client information into stable storage: 1/3.
910 * First, generate the leaf filename, from the client's IP address and
911 * the server-generated short-hand clientid.
912 */
913 void
rfs4_ss_clid(nfs4_srv_t * nsrv4,rfs4_client_t * cp)914 rfs4_ss_clid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
915 {
916 const char *kinet_ntop6(uchar_t *, char *, size_t);
917 char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
918 struct sockaddr *ca;
919 uchar_t *b;
920
921 if (rfs4_ss_enabled == 0) {
922 return;
923 }
924
925 buf[0] = 0;
926
927 ca = (struct sockaddr *)&cp->rc_addr;
928
929 /*
930 * Convert the caller's IP address to a dotted string
931 */
932 if (ca->sa_family == AF_INET) {
933 b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
934 (void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF,
935 b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
936 } else if (ca->sa_family == AF_INET6) {
937 struct sockaddr_in6 *sin6;
938
939 sin6 = (struct sockaddr_in6 *)ca;
940 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
941 buf, INET6_ADDRSTRLEN);
942 }
943
944 (void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
945 (longlong_t)cp->rc_clientid);
946 rfs4_ss_clid_write(nsrv4, cp, leaf);
947 }
948
949 /*
950 * Place client information into stable storage: 2/3.
951 * DSS: distributed stable storage: the file may need to be written to
952 * multiple directories.
953 */
954 static void
rfs4_ss_clid_write(nfs4_srv_t * nsrv4,rfs4_client_t * cp,char * leaf)955 rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf)
956 {
957 rfs4_servinst_t *sip;
958
959 /*
960 * It should be sufficient to write the leaf file to (all) DSS paths
961 * associated with just this client's instance. However, since our
962 * per-instance client grouping is solely temporal, HA-NFSv4 RG
963 * failover might result in us losing DSS data.
964 *
965 * Until the client grouping is improved, we must write the DSS data
966 * to all instances' paths. Start at the current instance, and
967 * walk the list backwards to the first.
968 */
969 mutex_enter(&nsrv4->servinst_lock);
970 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
971 int i, npaths = sip->dss_npaths;
972
973 /* write the leaf file to all DSS paths */
974 for (i = 0; i < npaths; i++) {
975 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
976
977 /* HA-NFSv4 path might have been failed-away from us */
978 if (dss_path == NULL)
979 continue;
980
981 rfs4_ss_clid_write_one(cp, dss_path->path, leaf);
982 }
983 }
984 mutex_exit(&nsrv4->servinst_lock);
985 }
986
987 /*
988 * Place client information into stable storage: 3/3.
989 * Write the stable storage data to the requested file.
990 */
991 static void
rfs4_ss_clid_write_one(rfs4_client_t * cp,char * dss_path,char * leaf)992 rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf)
993 {
994 int ioflag;
995 int file_vers = NFS4_SS_VERSION;
996 size_t dirlen;
997 struct uio uio;
998 struct iovec iov[4];
999 char *dir;
1000 rfs4_ss_pn_t *ss_pn;
1001 vnode_t *vp;
1002 nfs_client_id4 *cl_id4 = &(cp->rc_nfs_client);
1003
1004 /* allow 2 extra bytes for '/' & NUL */
1005 dirlen = strlen(dss_path) + strlen(NFS4_DSS_STATE_LEAF) + 2;
1006 dir = kmem_alloc(dirlen, KM_SLEEP);
1007 (void) sprintf(dir, "%s/%s", dss_path, NFS4_DSS_STATE_LEAF);
1008
1009 ss_pn = rfs4_ss_pnalloc(dir, leaf);
1010 /* rfs4_ss_pnalloc takes its own copy */
1011 kmem_free(dir, dirlen);
1012 if (ss_pn == NULL)
1013 return;
1014
1015 if (vn_open(ss_pn->pn, UIO_SYSSPACE, FCREAT|FWRITE, 0600, &vp,
1016 CRCREAT, 0)) {
1017 rfs4_ss_pnfree(ss_pn);
1018 return;
1019 }
1020
1021 /*
1022 * We need to record leaf - i.e. the filename - so that we know
1023 * what to remove, in the future. However, the dir part of cp->ss_pn
1024 * should never be referenced directly, since it's potentially only
1025 * one of several paths with this leaf in it.
1026 */
1027 if (cp->rc_ss_pn != NULL) {
1028 if (strcmp(cp->rc_ss_pn->leaf, leaf) == 0) {
1029 /* we've already recorded *this* leaf */
1030 rfs4_ss_pnfree(ss_pn);
1031 } else {
1032 /* replace with this leaf */
1033 rfs4_ss_pnfree(cp->rc_ss_pn);
1034 cp->rc_ss_pn = ss_pn;
1035 }
1036 } else {
1037 cp->rc_ss_pn = ss_pn;
1038 }
1039
1040 /*
1041 * Build a scatter list that points to the nfs_client_id4
1042 */
1043 iov[0].iov_base = (caddr_t)&file_vers;
1044 iov[0].iov_len = sizeof (int);
1045 iov[1].iov_base = (caddr_t)&(cl_id4->verifier);
1046 iov[1].iov_len = NFS4_VERIFIER_SIZE;
1047 iov[2].iov_base = (caddr_t)&(cl_id4->id_len);
1048 iov[2].iov_len = sizeof (uint_t);
1049 iov[3].iov_base = (caddr_t)cl_id4->id_val;
1050 iov[3].iov_len = cl_id4->id_len;
1051
1052 uio.uio_iov = iov;
1053 uio.uio_iovcnt = 4;
1054 uio.uio_loffset = 0;
1055 uio.uio_segflg = UIO_SYSSPACE;
1056 uio.uio_llimit = (rlim64_t)MAXOFFSET_T;
1057 uio.uio_resid = cl_id4->id_len + sizeof (int) +
1058 NFS4_VERIFIER_SIZE + sizeof (uint_t);
1059
1060 ioflag = uio.uio_fmode = (FWRITE|FSYNC);
1061 uio.uio_extflg = UIO_COPY_DEFAULT;
1062
1063 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1064 /* write the full client id to the file. */
1065 (void) VOP_WRITE(vp, &uio, ioflag, CRED(), NULL);
1066 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1067
1068 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
1069 VN_RELE(vp);
1070 }
1071
1072 /*
1073 * DSS: distributed stable storage.
1074 * Unpack the list of paths passed by nfsd.
1075 * Use nvlist_alloc(9F) to manage the data.
1076 * The caller is responsible for allocating and freeing the buffer.
1077 */
1078 int
rfs4_dss_setpaths(char * buf,size_t buflen)1079 rfs4_dss_setpaths(char *buf, size_t buflen)
1080 {
1081 int error;
1082
1083 /*
1084 * If this is a "warm start", i.e. we previously had DSS paths,
1085 * preserve the old paths.
1086 */
1087 if (rfs4_dss_paths != NULL) {
1088 /*
1089 * Before we lose the ptr, destroy the nvlist and pathnames
1090 * array from the warm start before this one.
1091 */
1092 nvlist_free(rfs4_dss_oldpaths);
1093 rfs4_dss_oldpaths = rfs4_dss_paths;
1094 }
1095
1096 /* unpack the buffer into a searchable nvlist */
1097 error = nvlist_unpack(buf, buflen, &rfs4_dss_paths, KM_SLEEP);
1098 if (error)
1099 return (error);
1100
1101 /*
1102 * Search the nvlist for the pathnames nvpair (which is the only nvpair
1103 * in the list, and record its location.
1104 */
1105 error = nvlist_lookup_string_array(rfs4_dss_paths, NFS4_DSS_NVPAIR_NAME,
1106 &rfs4_dss_newpaths, &rfs4_dss_numnewpaths);
1107 return (error);
1108 }
1109
1110 /*
1111 * Ultimately the nfssys() call NFS4_CLR_STATE endsup here
1112 * to find and mark the client for forced expire.
1113 */
1114 static void
rfs4_client_scrub(rfs4_entry_t ent,void * arg)1115 rfs4_client_scrub(rfs4_entry_t ent, void *arg)
1116 {
1117 rfs4_client_t *cp = (rfs4_client_t *)ent;
1118 struct nfs4clrst_args *clr = arg;
1119 struct sockaddr_in6 *ent_sin6;
1120 struct in6_addr clr_in6;
1121 struct sockaddr_in *ent_sin;
1122 struct in_addr clr_in;
1123
1124 if (clr->addr_type != cp->rc_addr.ss_family) {
1125 return;
1126 }
1127
1128 switch (clr->addr_type) {
1129
1130 case AF_INET6:
1131 /* copyin the address from user space */
1132 if (copyin(clr->ap, &clr_in6, sizeof (clr_in6))) {
1133 break;
1134 }
1135
1136 ent_sin6 = (struct sockaddr_in6 *)&cp->rc_addr;
1137
1138 /*
1139 * now compare, and if equivalent mark entry
1140 * for forced expiration
1141 */
1142 if (IN6_ARE_ADDR_EQUAL(&ent_sin6->sin6_addr, &clr_in6)) {
1143 cp->rc_forced_expire = 1;
1144 }
1145 break;
1146
1147 case AF_INET:
1148 /* copyin the address from user space */
1149 if (copyin(clr->ap, &clr_in, sizeof (clr_in))) {
1150 break;
1151 }
1152
1153 ent_sin = (struct sockaddr_in *)&cp->rc_addr;
1154
1155 /*
1156 * now compare, and if equivalent mark entry
1157 * for forced expiration
1158 */
1159 if (ent_sin->sin_addr.s_addr == clr_in.s_addr) {
1160 cp->rc_forced_expire = 1;
1161 }
1162 break;
1163
1164 default:
1165 /* force this assert to fail */
1166 ASSERT(clr->addr_type != clr->addr_type);
1167 }
1168 }
1169
1170 /*
1171 * This is called from nfssys() in order to clear server state
1172 * for the specified client IP Address.
1173 */
1174 int
rfs4_clear_client_state(struct nfs4clrst_args * clr)1175 rfs4_clear_client_state(struct nfs4clrst_args *clr)
1176 {
1177 nfs4_srv_t *nsrv4 = nfs4_get_srv();
1178 int rc;
1179
1180 /* Once nfssrv is loaded, every zone should have one of these. */
1181 VERIFY(nsrv4 != NULL);
1182
1183 mutex_enter(&nsrv4->state_lock);
1184 /*
1185 * But only after NFS service is running is the nfs4_server_state
1186 * around. It's dirty (and needs the state_lock held), but all of the
1187 * databases live deep in the nfs4_server_state, so it's the only thing
1188 * to legitimately check prior to using anything. The pointers
1189 * themselves may be stale.
1190 */
1191 if (nsrv4->nfs4_server_state != NULL) {
1192 VERIFY(nsrv4->rfs4_client_tab != NULL);
1193 rfs4_dbe_walk(nsrv4->rfs4_client_tab, rfs4_client_scrub, clr);
1194 rc = 0;
1195 } else {
1196 rc = ENXIO;
1197 }
1198 mutex_exit(&nsrv4->state_lock);
1199 return (rc);
1200 }
1201
1202 /*
1203 * Used to initialize the NFSv4 server's state or database. All of
1204 * the tables are created and timers are set.
1205 */
1206 void
rfs4_state_g_init(void)1207 rfs4_state_g_init(void)
1208 {
1209 extern boolean_t rfs4_cpr_callb(void *, int);
1210 /*
1211 * Add a CPR callback so that we can update client
1212 * access times to extend the lease after a suspend
1213 * and resume (using the same class as rpcmod/connmgr)
1214 */
1215 cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");
1216
1217 /*
1218 * NFSv4 server state databases
1219 *
1220 * Initialized when the module is loaded and used by NFSv4 state
1221 * tables. These kmem_cache free pools are used globally, the NFSv4
1222 * state tables which make use of these kmem_cache free pools are per
1223 * zone.
1224 *
1225 * initialize the global kmem_cache free pools which will be used by
1226 * the NFSv4 state tables.
1227 */
1228 rfs4_client_mem_cache = nfs4_init_mem_cache("Client_entry_cache",
1229 2, sizeof (rfs4_client_t), 0);
1230 rfs4_clntIP_mem_cache = nfs4_init_mem_cache("ClntIP_entry_cache",
1231 1, sizeof (rfs4_clntip_t), 1);
1232 rfs4_openown_mem_cache = nfs4_init_mem_cache("OpenOwner_entry_cache",
1233 1, sizeof (rfs4_openowner_t), 2);
1234 rfs4_openstID_mem_cache = nfs4_init_mem_cache("OpenStateID_entry_cache",
1235 3, sizeof (rfs4_state_t), 3);
1236 rfs4_lockstID_mem_cache = nfs4_init_mem_cache("LockStateID_entry_cache",
1237 3, sizeof (rfs4_lo_state_t), 4);
1238 rfs4_lockown_mem_cache = nfs4_init_mem_cache("Lockowner_entry_cache",
1239 2, sizeof (rfs4_lockowner_t), 5);
1240 rfs4_file_mem_cache = nfs4_init_mem_cache("File_entry_cache",
1241 1, sizeof (rfs4_file_t), 6);
1242 rfs4_delegstID_mem_cache =
1243 nfs4_init_mem_cache("DelegStateID_entry_cache", 2,
1244 sizeof (rfs4_deleg_state_t), 7);
1245 rfs4_session_mem_cache = nfs4_init_mem_cache("Session_entry_cache",
1246 1, sizeof (rfs4_session_t), 8);
1247 }
1248
1249
1250 /*
1251 * Used at server shutdown to cleanup all of the NFSv4 server's structures
1252 * and other state.
1253 */
1254 void
rfs4_state_g_fini(void)1255 rfs4_state_g_fini(void)
1256 {
1257 int i;
1258 /*
1259 * Cleanup the CPR callback.
1260 */
1261 if (cpr_id)
1262 (void) callb_delete(cpr_id);
1263
1264 /* free the NFSv4 state databases */
1265 for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
1266 kmem_cache_destroy(rfs4_db_mem_cache_table[i].r_db_mem_cache);
1267 rfs4_db_mem_cache_table[i].r_db_mem_cache = NULL;
1268 }
1269
1270 rfs4_client_mem_cache = NULL;
1271 rfs4_clntIP_mem_cache = NULL;
1272 rfs4_openown_mem_cache = NULL;
1273 rfs4_openstID_mem_cache = NULL;
1274 rfs4_lockstID_mem_cache = NULL;
1275 rfs4_lockown_mem_cache = NULL;
1276 rfs4_file_mem_cache = NULL;
1277 rfs4_delegstID_mem_cache = NULL;
1278 rfs4_session_mem_cache = NULL;
1279
1280 /* DSS: distributed stable storage */
1281 nvlist_free(rfs4_dss_oldpaths);
1282 nvlist_free(rfs4_dss_paths);
1283 rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
1284 }
1285
1286 /*
1287 * Used to initialize the per zone NFSv4 server's state
1288 */
1289 void
rfs4_state_zone_init(nfs4_srv_t * nsrv4)1290 rfs4_state_zone_init(nfs4_srv_t *nsrv4)
1291 {
1292 time_t start_time;
1293 int start_grace;
1294 char *dss_path = NFS4_DSS_VAR_DIR;
1295
1296 /* DSS: distributed stable storage: initialise served paths list */
1297 nsrv4->dss_pathlist = NULL;
1298
1299 /*
1300 * Set the boot time. If the server
1301 * has been restarted quickly and has had the opportunity to
1302 * service clients, then the start_time needs to be bumped
1303 * regardless. A small window but it exists...
1304 */
1305 start_time = gethrestime_sec();
1306 if (nsrv4->rfs4_start_time < start_time)
1307 nsrv4->rfs4_start_time = start_time;
1308 else
1309 nsrv4->rfs4_start_time++;
1310
1311 /*
1312 * Create the first server instance, or a new one if the server has
1313 * been restarted; see above comments on rfs4_start_time. Don't
1314 * start its grace period; that will be done later, to maximise the
1315 * clients' recovery window.
1316 */
1317 start_grace = 0;
1318 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
1319 int i;
1320 char **dss_allpaths = NULL;
1321 dss_allpaths = kmem_alloc(sizeof (char *) *
1322 (rfs4_dss_numnewpaths + 1), KM_SLEEP);
1323 /*
1324 * Add the default path into the list of paths for saving
1325 * state informantion.
1326 */
1327 dss_allpaths[0] = dss_path;
1328 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
1329 dss_allpaths[i + 1] = rfs4_dss_newpaths[i];
1330 }
1331 rfs4_servinst_create(nsrv4, start_grace,
1332 (rfs4_dss_numnewpaths + 1), dss_allpaths);
1333 kmem_free(dss_allpaths,
1334 (sizeof (char *) * (rfs4_dss_numnewpaths + 1)));
1335 } else {
1336 rfs4_servinst_create(nsrv4, start_grace, 1, &dss_path);
1337 }
1338
1339 /* reset the "first NFSv4 request" status */
1340 nsrv4->seen_first_compound = 0;
1341
1342 mutex_enter(&nsrv4->state_lock);
1343
1344 /*
1345 * If the server state database has already been initialized,
1346 * skip it
1347 */
1348 if (nsrv4->nfs4_server_state != NULL) {
1349 mutex_exit(&nsrv4->state_lock);
1350 return;
1351 }
1352
1353 rw_init(&nsrv4->rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);
1354
1355 /* set the various cache timers for table creation */
1356 if (nsrv4->rfs4_client_cache_time == 0)
1357 nsrv4->rfs4_client_cache_time = CLIENT_CACHE_TIME;
1358 if (nsrv4->rfs4_openowner_cache_time == 0)
1359 nsrv4->rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
1360 if (nsrv4->rfs4_state_cache_time == 0)
1361 nsrv4->rfs4_state_cache_time = STATE_CACHE_TIME;
1362 if (nsrv4->rfs4_lo_state_cache_time == 0)
1363 nsrv4->rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
1364 if (nsrv4->rfs4_lockowner_cache_time == 0)
1365 nsrv4->rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
1366 if (nsrv4->rfs4_file_cache_time == 0)
1367 nsrv4->rfs4_file_cache_time = FILE_CACHE_TIME;
1368 if (nsrv4->rfs4_deleg_state_cache_time == 0)
1369 nsrv4->rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
1370
1371 /* Create the overall database to hold all server state */
1372 nsrv4->nfs4_server_state = rfs4_database_create(rfs4_database_debug);
1373
1374 /* Now create the individual tables */
1375 nsrv4->rfs4_client_cache_time *= rfs4_lease_time;
1376 nsrv4->rfs4_client_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1377 "Client",
1378 nsrv4->rfs4_client_cache_time,
1379 2,
1380 rfs4_client_create,
1381 rfs4_client_destroy,
1382 rfs4_client_expiry,
1383 sizeof (rfs4_client_t),
1384 TABSIZE,
1385 MAXTABSZ/8, 100);
1386 nsrv4->rfs4_nfsclnt_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1387 "nfs_client_id4", nfsclnt_hash,
1388 nfsclnt_compare, nfsclnt_mkkey,
1389 TRUE);
1390 nsrv4->rfs4_clientid_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1391 "client_id", clientid_hash,
1392 clientid_compare, clientid_mkkey,
1393 FALSE);
1394
1395 nsrv4->rfs4_clntip_cache_time = 86400 * 365; /* about a year */
1396 nsrv4->rfs4_clntip_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1397 "ClntIP",
1398 nsrv4->rfs4_clntip_cache_time,
1399 1,
1400 rfs4_clntip_create,
1401 rfs4_clntip_destroy,
1402 rfs4_clntip_expiry,
1403 sizeof (rfs4_clntip_t),
1404 TABSIZE,
1405 MAXTABSZ, 100);
1406 nsrv4->rfs4_clntip_idx = rfs4_index_create(nsrv4->rfs4_clntip_tab,
1407 "client_ip", clntip_hash,
1408 clntip_compare, clntip_mkkey,
1409 TRUE);
1410
1411 nsrv4->rfs4_openowner_cache_time *= rfs4_lease_time;
1412 nsrv4->rfs4_openowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1413 "OpenOwner",
1414 nsrv4->rfs4_openowner_cache_time,
1415 1,
1416 rfs4_openowner_create,
1417 rfs4_openowner_destroy,
1418 rfs4_openowner_expiry,
1419 sizeof (rfs4_openowner_t),
1420 TABSIZE,
1421 MAXTABSZ, 100);
1422 nsrv4->rfs4_openowner_idx = rfs4_index_create(nsrv4->rfs4_openowner_tab,
1423 "open_owner4", openowner_hash,
1424 openowner_compare,
1425 openowner_mkkey, TRUE);
1426
1427 nsrv4->rfs4_state_cache_time *= rfs4_lease_time;
1428 nsrv4->rfs4_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1429 "OpenStateID",
1430 nsrv4->rfs4_state_cache_time,
1431 3,
1432 rfs4_state_create,
1433 rfs4_state_destroy,
1434 rfs4_state_expiry,
1435 sizeof (rfs4_state_t),
1436 TABSIZE,
1437 MAXTABSZ, 100);
1438
1439 /* CSTYLED */
1440 nsrv4->rfs4_state_owner_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1441 "Openowner-File",
1442 state_owner_file_hash,
1443 state_owner_file_compare,
1444 state_owner_file_mkkey, TRUE);
1445
1446 nsrv4->rfs4_state_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1447 "State-id", state_hash,
1448 state_compare, state_mkkey, FALSE);
1449
1450 nsrv4->rfs4_state_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1451 "File", state_file_hash,
1452 state_file_compare, state_file_mkkey,
1453 FALSE);
1454
1455 nsrv4->rfs4_lo_state_cache_time *= rfs4_lease_time;
1456 nsrv4->rfs4_lo_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1457 "LockStateID",
1458 nsrv4->rfs4_lo_state_cache_time,
1459 2,
1460 rfs4_lo_state_create,
1461 rfs4_lo_state_destroy,
1462 rfs4_lo_state_expiry,
1463 sizeof (rfs4_lo_state_t),
1464 TABSIZE,
1465 MAXTABSZ, 100);
1466
1467 /* CSTYLED */
1468 nsrv4->rfs4_lo_state_owner_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1469 "lockownerxstate",
1470 lo_state_lo_hash,
1471 lo_state_lo_compare,
1472 lo_state_lo_mkkey, TRUE);
1473
1474 nsrv4->rfs4_lo_state_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1475 "State-id",
1476 lo_state_hash, lo_state_compare,
1477 lo_state_mkkey, FALSE);
1478
1479 nsrv4->rfs4_lockowner_cache_time *= rfs4_lease_time;
1480
1481 nsrv4->rfs4_lockowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1482 "Lockowner",
1483 nsrv4->rfs4_lockowner_cache_time,
1484 2,
1485 rfs4_lockowner_create,
1486 rfs4_lockowner_destroy,
1487 rfs4_lockowner_expiry,
1488 sizeof (rfs4_lockowner_t),
1489 TABSIZE,
1490 MAXTABSZ, 100);
1491
1492 nsrv4->rfs4_lockowner_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1493 "lock_owner4", lockowner_hash,
1494 lockowner_compare,
1495 lockowner_mkkey, TRUE);
1496
1497 /* CSTYLED */
1498 nsrv4->rfs4_lockowner_pid_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1499 "pid", pid_hash,
1500 pid_compare, pid_mkkey,
1501 FALSE);
1502
1503 nsrv4->rfs4_file_cache_time *= rfs4_lease_time;
1504 nsrv4->rfs4_file_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1505 "File",
1506 nsrv4->rfs4_file_cache_time,
1507 1,
1508 rfs4_file_create,
1509 rfs4_file_destroy,
1510 NULL,
1511 sizeof (rfs4_file_t),
1512 TABSIZE,
1513 MAXTABSZ, -1);
1514
1515 nsrv4->rfs4_file_idx = rfs4_index_create(nsrv4->rfs4_file_tab,
1516 "Filehandle", file_hash,
1517 file_compare, file_mkkey, TRUE);
1518
1519 nsrv4->rfs4_deleg_state_cache_time *= rfs4_lease_time;
1520 /* CSTYLED */
1521 nsrv4->rfs4_deleg_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1522 "DelegStateID",
1523 nsrv4->rfs4_deleg_state_cache_time,
1524 2,
1525 rfs4_deleg_state_create,
1526 rfs4_deleg_state_destroy,
1527 rfs4_deleg_state_expiry,
1528 sizeof (rfs4_deleg_state_t),
1529 TABSIZE,
1530 MAXTABSZ, 100);
1531 nsrv4->rfs4_deleg_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1532 "DelegByFileClient",
1533 deleg_hash,
1534 deleg_compare,
1535 deleg_mkkey, TRUE);
1536
1537 /* CSTYLED */
1538 nsrv4->rfs4_deleg_state_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1539 "DelegState",
1540 deleg_state_hash,
1541 deleg_state_compare,
1542 deleg_state_mkkey, FALSE);
1543
1544 rfs4x_state_init_locked(nsrv4);
1545
1546 mutex_exit(&nsrv4->state_lock);
1547
1548 /*
1549 * Init the stable storage.
1550 */
1551 rfs4_ss_init(nsrv4);
1552 }
1553
1554 /*
1555 * Used at server shutdown to cleanup all of NFSv4 server's zone structures
1556 * and state.
1557 */
1558 void
rfs4_state_zone_fini(void)1559 rfs4_state_zone_fini(void)
1560 {
1561 rfs4_database_t *dbp;
1562 nfs4_srv_t *nsrv4;
1563 nsrv4 = nfs4_get_srv();
1564
1565 rfs4_set_deleg_policy(nsrv4, SRV_NEVER_DELEGATE);
1566
1567 /*
1568 * Clean up any dangling stable storage structures BEFORE calling
1569 * rfs4_servinst_destroy_all() so there are no dangling structures
1570 * (i.e. the srvinsts are all cleared of danglers BEFORE they get
1571 * freed).
1572 */
1573 rfs4_ss_fini(nsrv4);
1574
1575 mutex_enter(&nsrv4->state_lock);
1576
1577 if (nsrv4->nfs4_server_state == NULL) {
1578 mutex_exit(&nsrv4->state_lock);
1579 return;
1580 }
1581
1582 rfs4x_state_fini(nsrv4);
1583
1584 /* destroy server instances and current instance ptr */
1585 rfs4_servinst_destroy_all(nsrv4);
1586
1587 /* reset the "first NFSv4 request" status */
1588 nsrv4->seen_first_compound = 0;
1589
1590 dbp = nsrv4->nfs4_server_state;
1591 nsrv4->nfs4_server_state = NULL;
1592
1593 rw_destroy(&nsrv4->rfs4_findclient_lock);
1594
1595 /* First stop all of the reaper threads in the database */
1596 rfs4_database_shutdown(dbp);
1597
1598 /*
1599 * WARNING: There may be consumers of the rfs4 database still
1600 * active as we destroy these. IF that's the case, consider putting
1601 * some of their _zone_fini()-like functions into the zsd key as
1602 * ~~SHUTDOWN~~ functions instead of ~~DESTROY~~ functions. We can
1603 * maintain some ordering guarantees better that way.
1604 */
1605 /* Now destroy/release the database tables */
1606 rfs4_database_destroy(dbp);
1607
1608 /* Reset the cache timers for next time */
1609 nsrv4->rfs4_client_cache_time = 0;
1610 nsrv4->rfs4_openowner_cache_time = 0;
1611 nsrv4->rfs4_state_cache_time = 0;
1612 nsrv4->rfs4_lo_state_cache_time = 0;
1613 nsrv4->rfs4_lockowner_cache_time = 0;
1614 nsrv4->rfs4_file_cache_time = 0;
1615 nsrv4->rfs4_deleg_state_cache_time = 0;
1616
1617 mutex_exit(&nsrv4->state_lock);
1618 }
1619
1620 typedef union {
1621 struct {
1622 uint32_t start_time;
1623 uint32_t c_id;
1624 } impl_id;
1625 clientid4 id4;
1626 } cid;
1627
1628 static int foreign_stateid(stateid_t *id);
1629 static int foreign_clientid(cid *cidp);
1630 static void embed_nodeid(cid *cidp);
1631
1632 typedef union {
1633 struct {
1634 uint32_t c_id;
1635 uint32_t gen_num;
1636 } cv_impl;
1637 verifier4 confirm_verf;
1638 } scid_confirm_verf;
1639
1640 static uint32_t
clientid_hash(void * key)1641 clientid_hash(void *key)
1642 {
1643 cid *idp = key;
1644
1645 return (idp->impl_id.c_id);
1646 }
1647
1648 static bool_t
clientid_compare(rfs4_entry_t entry,void * key)1649 clientid_compare(rfs4_entry_t entry, void *key)
1650 {
1651 rfs4_client_t *cp = (rfs4_client_t *)entry;
1652 clientid4 *idp = key;
1653
1654 return (*idp == cp->rc_clientid);
1655 }
1656
1657 static void *
clientid_mkkey(rfs4_entry_t entry)1658 clientid_mkkey(rfs4_entry_t entry)
1659 {
1660 rfs4_client_t *cp = (rfs4_client_t *)entry;
1661
1662 return (&cp->rc_clientid);
1663 }
1664
1665 static uint32_t
nfsclnt_hash(void * key)1666 nfsclnt_hash(void *key)
1667 {
1668 nfs_client_id4 *client = key;
1669 int i;
1670 uint32_t hash = 0;
1671
1672 for (i = 0; i < client->id_len; i++) {
1673 hash <<= 1;
1674 hash += (uint_t)client->id_val[i];
1675 }
1676 return (hash);
1677 }
1678
1679
1680 static bool_t
nfsclnt_compare(rfs4_entry_t entry,void * key)1681 nfsclnt_compare(rfs4_entry_t entry, void *key)
1682 {
1683 rfs4_client_t *cp = (rfs4_client_t *)entry;
1684 nfs_client_id4 *nfs_client = key;
1685
1686 if (cp->rc_nfs_client.id_len != nfs_client->id_len)
1687 return (FALSE);
1688
1689 return (bcmp(cp->rc_nfs_client.id_val, nfs_client->id_val,
1690 nfs_client->id_len) == 0);
1691 }
1692
1693 static void *
nfsclnt_mkkey(rfs4_entry_t entry)1694 nfsclnt_mkkey(rfs4_entry_t entry)
1695 {
1696 rfs4_client_t *cp = (rfs4_client_t *)entry;
1697
1698 return (&cp->rc_nfs_client);
1699 }
1700
1701 static bool_t
rfs4_client_expiry(rfs4_entry_t u_entry)1702 rfs4_client_expiry(rfs4_entry_t u_entry)
1703 {
1704 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1705 bool_t cp_expired;
1706
1707 if (rfs4_dbe_is_invalid(cp->rc_dbe)) {
1708 cp->rc_ss_remove = 1;
1709 return (TRUE);
1710 }
1711 /*
1712 * If the sysadmin has used clear_locks for this
1713 * entry then forced_expire will be set and we
1714 * want this entry to be reaped. Or the entry
1715 * has exceeded its lease period.
1716 */
1717 cp_expired = (cp->rc_forced_expire ||
1718 (gethrestime_sec() - cp->rc_last_access
1719 > rfs4_lease_time));
1720
1721 if (!cp->rc_ss_remove && cp_expired)
1722 cp->rc_ss_remove = 1;
1723 return (cp_expired);
1724 }
1725
1726 /*
1727 * Remove the leaf file from all distributed stable storage paths.
1728 */
1729 static void
rfs4_dss_remove_cpleaf(rfs4_client_t * cp)1730 rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
1731 {
1732 nfs4_srv_t *nsrv4;
1733 rfs4_servinst_t *sip;
1734 char *leaf = cp->rc_ss_pn->leaf;
1735
1736 /*
1737 * since the state files are written to all DSS
1738 * paths we must remove this leaf file instance
1739 * from all server instances.
1740 */
1741
1742 nsrv4 = nfs4_get_srv();
1743 mutex_enter(&nsrv4->servinst_lock);
1744 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
1745 /* remove the leaf file associated with this server instance */
1746 rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf);
1747 }
1748 mutex_exit(&nsrv4->servinst_lock);
1749 }
1750
1751 static void
rfs4_dss_remove_leaf(rfs4_servinst_t * sip,char * dir_leaf,char * leaf)1752 rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf)
1753 {
1754 int i, npaths = sip->dss_npaths;
1755
1756 for (i = 0; i < npaths; i++) {
1757 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
1758 char *path, *dir;
1759 size_t pathlen;
1760
1761 /* the HA-NFSv4 path might have been failed-over away from us */
1762 if (dss_path == NULL)
1763 continue;
1764
1765 dir = dss_path->path;
1766
1767 /* allow 3 extra bytes for two '/' & a NUL */
1768 pathlen = strlen(dir) + strlen(dir_leaf) + strlen(leaf) + 3;
1769 path = kmem_alloc(pathlen, KM_SLEEP);
1770 (void) sprintf(path, "%s/%s/%s", dir, dir_leaf, leaf);
1771
1772 (void) vn_remove(path, UIO_SYSSPACE, RMFILE);
1773
1774 kmem_free(path, pathlen);
1775 }
1776 }
1777
1778 static void
rfs4_client_destroy(rfs4_entry_t u_entry)1779 rfs4_client_destroy(rfs4_entry_t u_entry)
1780 {
1781 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1782
1783 mutex_destroy(cp->rc_cbinfo.cb_lock);
1784 cv_destroy(cp->rc_cbinfo.cb_cv);
1785 cv_destroy(cp->rc_cbinfo.cb_cv_nullcaller);
1786 list_destroy(&cp->rc_openownerlist);
1787
1788 list_destroy(&cp->rc_sessions);
1789
1790 /* free callback info */
1791 rfs4_cbinfo_free(&cp->rc_cbinfo);
1792
1793 if (cp->rc_cp_confirmed)
1794 rfs4_client_rele(cp->rc_cp_confirmed);
1795
1796 if (cp->rc_ss_pn) {
1797 /* check if the stable storage files need to be removed */
1798 if (cp->rc_ss_remove)
1799 rfs4_dss_remove_cpleaf(cp);
1800 rfs4_ss_pnfree(cp->rc_ss_pn);
1801 }
1802
1803 /* Free the client supplied client id */
1804 kmem_free(cp->rc_nfs_client.id_val, cp->rc_nfs_client.id_len);
1805
1806 if (cp->rc_sysidt != LM_NOSYSID)
1807 lm_free_sysidt(cp->rc_sysidt);
1808
1809 rfs4_free_cred_set(&cp->rc_cr_set);
1810 }
1811
1812 static bool_t
rfs4_client_create(rfs4_entry_t u_entry,void * arg)1813 rfs4_client_create(rfs4_entry_t u_entry, void *arg)
1814 {
1815 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1816 nfs_client_id4 *client = (nfs_client_id4 *)arg;
1817 struct sockaddr *ca;
1818 cid *cidp;
1819 scid_confirm_verf *scvp;
1820 nfs4_srv_t *nsrv4;
1821
1822 nsrv4 = nfs4_get_srv();
1823
1824 /* Get a clientid to give to the client */
1825 cidp = (cid *)&cp->rc_clientid;
1826 cidp->impl_id.start_time = nsrv4->rfs4_start_time;
1827 cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe);
1828
1829 /* If we are booted as a cluster node, embed our nodeid */
1830 if (cluster_bootflags & CLUSTER_BOOTED)
1831 embed_nodeid(cidp);
1832
1833 /* Allocate and copy client's client id value */
1834 cp->rc_nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP);
1835 cp->rc_nfs_client.id_len = client->id_len;
1836 bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len);
1837 cp->rc_nfs_client.verifier = client->verifier;
1838
1839 /* Copy client's IP address */
1840 ca = client->cl_addr;
1841 if (ca->sa_family == AF_INET)
1842 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in));
1843 else if (ca->sa_family == AF_INET6)
1844 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in6));
1845 cp->rc_nfs_client.cl_addr = (struct sockaddr *)&cp->rc_addr;
1846
1847 /* Init the value for the SETCLIENTID_CONFIRM verifier */
1848 scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1849 scvp->cv_impl.c_id = cidp->impl_id.c_id;
1850 scvp->cv_impl.gen_num = 0;
1851
1852 /* An F_UNLKSYS has been done for this client */
1853 cp->rc_unlksys_completed = FALSE;
1854
1855 /* We need the client to ack us */
1856 cp->rc_need_confirm = TRUE;
1857 cp->rc_cp_confirmed = NULL;
1858 cp->rc_destroying = FALSE;
1859
1860 /* TRUE all the time until the callback path actually fails */
1861 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
1862
1863 /* Initialize the access time to now */
1864 cp->rc_last_access = gethrestime_sec();
1865
1866 bzero(&cp->rc_cr_set, sizeof (cred_set_t));
1867
1868 cp->rc_sysidt = LM_NOSYSID;
1869
1870 list_create(&cp->rc_openownerlist, sizeof (rfs4_openowner_t),
1871 offsetof(rfs4_openowner_t, ro_node));
1872
1873 list_create(&cp->rc_sessions, sizeof (rfs4_session_t),
1874 offsetof(rfs4_session_t, sn_node));
1875
1876 /* set up the callback control structure */
1877 cp->rc_cbinfo.cb_state = CB_UNINIT;
1878 mutex_init(cp->rc_cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL);
1879 cv_init(cp->rc_cbinfo.cb_cv, NULL, CV_DEFAULT, NULL);
1880 cv_init(cp->rc_cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL);
1881
1882 /*
1883 * Associate the client_t with the current server instance.
1884 * The hold is solely to satisfy the calling requirement of
1885 * rfs4_servinst_assign(). In this case it's not strictly necessary.
1886 */
1887 rfs4_dbe_hold(cp->rc_dbe);
1888 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
1889 rfs4_dbe_rele(cp->rc_dbe);
1890
1891 /*
1892 * NFSv4.1: See rfc8881, Section 18.36.4, eir_sequenceid
1893 * "Before the server replies to that EXCHANGE_ID
1894 * operation, it initializes the client ID slot to be equal to
1895 * eir_sequenceid - 1 (accounting for underflow), and records a
1896 * contrived CREATE_SESSION result with a "cached" result of
1897 * NFS4ERR_SEQ_MISORDERED."
1898 */
1899 cp->rc_contrived.xi_sid = 1;
1900 cp->rc_contrived.cs_status = NFS4ERR_SEQ_MISORDERED;
1901
1902 return (TRUE);
1903 }
1904
1905 /*
1906 * Caller wants to generate/update the setclientid_confirm verifier
1907 * associated with a client. This is done during the SETCLIENTID
1908 * processing.
1909 */
1910 void
rfs4_client_scv_next(rfs4_client_t * cp)1911 rfs4_client_scv_next(rfs4_client_t *cp)
1912 {
1913 scid_confirm_verf *scvp;
1914
1915 /* Init the value for the SETCLIENTID_CONFIRM verifier */
1916 scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1917 scvp->cv_impl.gen_num++;
1918 }
1919
1920 void
rfs4_client_rele(rfs4_client_t * cp)1921 rfs4_client_rele(rfs4_client_t *cp)
1922 {
1923 rfs4_dbe_rele(cp->rc_dbe);
1924 }
1925
1926 rfs4_client_t *
rfs4_findclient(nfs_client_id4 * client,bool_t * create,rfs4_client_t * oldcp)1927 rfs4_findclient(nfs_client_id4 *client, bool_t *create, rfs4_client_t *oldcp)
1928 {
1929 rfs4_client_t *cp;
1930 nfs4_srv_t *nsrv4;
1931 nsrv4 = nfs4_get_srv();
1932
1933
1934 if (oldcp) {
1935 rw_enter(&nsrv4->rfs4_findclient_lock, RW_WRITER);
1936 rfs4_dbe_hide(oldcp->rc_dbe);
1937 } else {
1938 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1939 }
1940
1941 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_nfsclnt_idx, client,
1942 create, (void *)client, RFS4_DBS_VALID);
1943
1944 if (oldcp)
1945 rfs4_dbe_unhide(oldcp->rc_dbe);
1946
1947 rw_exit(&nsrv4->rfs4_findclient_lock);
1948
1949 return (cp);
1950 }
1951
1952 rfs4_client_t *
rfs4_findclient_by_id(clientid4 clientid,bool_t find_unconfirmed)1953 rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed)
1954 {
1955 rfs4_client_t *cp;
1956 bool_t create = FALSE;
1957 cid *cidp = (cid *)&clientid;
1958 nfs4_srv_t *nsrv4 = nfs4_get_srv();
1959
1960 /* If we're a cluster and the nodeid isn't right, short-circuit */
1961 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
1962 return (NULL);
1963
1964 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1965
1966 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx, &clientid,
1967 &create, NULL, RFS4_DBS_VALID);
1968
1969 rw_exit(&nsrv4->rfs4_findclient_lock);
1970
1971 if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) {
1972 rfs4_client_rele(cp);
1973 return (NULL);
1974 } else {
1975 return (cp);
1976 }
1977 }
1978
1979 static uint32_t
clntip_hash(void * key)1980 clntip_hash(void *key)
1981 {
1982 struct sockaddr *addr = key;
1983 int i, len = 0;
1984 uint32_t hash = 0;
1985 char *ptr;
1986
1987 if (addr->sa_family == AF_INET) {
1988 struct sockaddr_in *a = (struct sockaddr_in *)addr;
1989 len = sizeof (struct in_addr);
1990 ptr = (char *)&a->sin_addr;
1991 } else if (addr->sa_family == AF_INET6) {
1992 struct sockaddr_in6 *a = (struct sockaddr_in6 *)addr;
1993 len = sizeof (struct in6_addr);
1994 ptr = (char *)&a->sin6_addr;
1995 } else
1996 return (0);
1997
1998 for (i = 0; i < len; i++) {
1999 hash <<= 1;
2000 hash += (uint_t)ptr[i];
2001 }
2002 return (hash);
2003 }
2004
2005 static bool_t
clntip_compare(rfs4_entry_t entry,void * key)2006 clntip_compare(rfs4_entry_t entry, void *key)
2007 {
2008 rfs4_clntip_t *cp = (rfs4_clntip_t *)entry;
2009 struct sockaddr *addr = key;
2010 int len = 0;
2011 char *p1, *p2;
2012
2013 if (addr->sa_family == AF_INET) {
2014 struct sockaddr_in *a1 = (struct sockaddr_in *)&cp->ri_addr;
2015 struct sockaddr_in *a2 = (struct sockaddr_in *)addr;
2016 len = sizeof (struct in_addr);
2017 p1 = (char *)&a1->sin_addr;
2018 p2 = (char *)&a2->sin_addr;
2019 } else if (addr->sa_family == AF_INET6) {
2020 struct sockaddr_in6 *a1 = (struct sockaddr_in6 *)&cp->ri_addr;
2021 struct sockaddr_in6 *a2 = (struct sockaddr_in6 *)addr;
2022 len = sizeof (struct in6_addr);
2023 p1 = (char *)&a1->sin6_addr;
2024 p2 = (char *)&a2->sin6_addr;
2025 } else
2026 return (0);
2027
2028 return (bcmp(p1, p2, len) == 0);
2029 }
2030
2031 static void *
clntip_mkkey(rfs4_entry_t entry)2032 clntip_mkkey(rfs4_entry_t entry)
2033 {
2034 rfs4_clntip_t *cp = (rfs4_clntip_t *)entry;
2035
2036 return (&cp->ri_addr);
2037 }
2038
2039 static bool_t
rfs4_clntip_expiry(rfs4_entry_t u_entry)2040 rfs4_clntip_expiry(rfs4_entry_t u_entry)
2041 {
2042 rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
2043
2044 if (rfs4_dbe_is_invalid(cp->ri_dbe))
2045 return (TRUE);
2046 return (FALSE);
2047 }
2048
2049 /* ARGSUSED */
2050 static void
rfs4_clntip_destroy(rfs4_entry_t u_entry)2051 rfs4_clntip_destroy(rfs4_entry_t u_entry)
2052 {
2053 }
2054
2055 static bool_t
rfs4_clntip_create(rfs4_entry_t u_entry,void * arg)2056 rfs4_clntip_create(rfs4_entry_t u_entry, void *arg)
2057 {
2058 rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
2059 struct sockaddr *ca = (struct sockaddr *)arg;
2060
2061 /* Copy client's IP address */
2062 if (ca->sa_family == AF_INET)
2063 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in));
2064 else if (ca->sa_family == AF_INET6)
2065 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in6));
2066 else
2067 return (FALSE);
2068 cp->ri_no_referrals = 1;
2069
2070 return (TRUE);
2071 }
2072
2073 rfs4_clntip_t *
rfs4_find_clntip(struct sockaddr * addr,bool_t * create)2074 rfs4_find_clntip(struct sockaddr *addr, bool_t *create)
2075 {
2076 rfs4_clntip_t *cp;
2077 nfs4_srv_t *nsrv4;
2078
2079 nsrv4 = nfs4_get_srv();
2080
2081 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2082
2083 cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
2084 create, addr, RFS4_DBS_VALID);
2085
2086 rw_exit(&nsrv4->rfs4_findclient_lock);
2087
2088 return (cp);
2089 }
2090
2091 void
rfs4_invalidate_clntip(struct sockaddr * addr)2092 rfs4_invalidate_clntip(struct sockaddr *addr)
2093 {
2094 rfs4_clntip_t *cp;
2095 bool_t create = FALSE;
2096 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2097
2098 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2099
2100 cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
2101 &create, NULL, RFS4_DBS_VALID);
2102 if (cp == NULL) {
2103 rw_exit(&nsrv4->rfs4_findclient_lock);
2104 return;
2105 }
2106 rfs4_dbe_invalidate(cp->ri_dbe);
2107 rfs4_dbe_rele(cp->ri_dbe);
2108
2109 rw_exit(&nsrv4->rfs4_findclient_lock);
2110 }
2111
2112 bool_t
rfs4_lease_expired(rfs4_client_t * cp)2113 rfs4_lease_expired(rfs4_client_t *cp)
2114 {
2115 bool_t rc;
2116
2117 rfs4_dbe_lock(cp->rc_dbe);
2118
2119 /*
2120 * If the admin has executed clear_locks for this
2121 * client id, force expire will be set, so no need
2122 * to calculate anything because it's "outa here".
2123 */
2124 if (cp->rc_forced_expire) {
2125 rc = TRUE;
2126 } else {
2127 rc = (gethrestime_sec() - cp->rc_last_access > rfs4_lease_time);
2128 }
2129
2130 /*
2131 * If the lease has expired we will also want
2132 * to remove any stable storage state data. So
2133 * mark the client id accordingly.
2134 */
2135 if (!cp->rc_ss_remove)
2136 cp->rc_ss_remove = (rc == TRUE);
2137
2138 rfs4_dbe_unlock(cp->rc_dbe);
2139
2140 return (rc);
2141 }
2142
2143 void
rfs4_update_lease(rfs4_client_t * cp)2144 rfs4_update_lease(rfs4_client_t *cp)
2145 {
2146 rfs4_dbe_lock(cp->rc_dbe);
2147 if (!cp->rc_forced_expire)
2148 cp->rc_last_access = gethrestime_sec();
2149 rfs4_dbe_unlock(cp->rc_dbe);
2150 }
2151
2152
2153 static bool_t
EQOPENOWNER(open_owner4 * a,open_owner4 * b)2154 EQOPENOWNER(open_owner4 *a, open_owner4 *b)
2155 {
2156 bool_t rc;
2157
2158 if (a->clientid != b->clientid)
2159 return (FALSE);
2160
2161 if (a->owner_len != b->owner_len)
2162 return (FALSE);
2163
2164 rc = (bcmp(a->owner_val, b->owner_val, a->owner_len) == 0);
2165
2166 return (rc);
2167 }
2168
2169 static uint_t
openowner_hash(void * key)2170 openowner_hash(void *key)
2171 {
2172 int i;
2173 open_owner4 *openowner = key;
2174 uint_t hash = 0;
2175
2176 for (i = 0; i < openowner->owner_len; i++) {
2177 hash <<= 4;
2178 hash += (uint_t)openowner->owner_val[i];
2179 }
2180 hash += (uint_t)openowner->clientid;
2181 hash |= (openowner->clientid >> 32);
2182
2183 return (hash);
2184 }
2185
2186 static bool_t
openowner_compare(rfs4_entry_t u_entry,void * key)2187 openowner_compare(rfs4_entry_t u_entry, void *key)
2188 {
2189 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2190 open_owner4 *arg = key;
2191
2192 return (EQOPENOWNER(&oo->ro_owner, arg));
2193 }
2194
2195 void *
openowner_mkkey(rfs4_entry_t u_entry)2196 openowner_mkkey(rfs4_entry_t u_entry)
2197 {
2198 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2199
2200 return (&oo->ro_owner);
2201 }
2202
2203 /* ARGSUSED */
2204 static bool_t
rfs4_openowner_expiry(rfs4_entry_t u_entry)2205 rfs4_openowner_expiry(rfs4_entry_t u_entry)
2206 {
2207 /* openstateid held us and did all needed delay */
2208 return (TRUE);
2209 }
2210
2211 static void
rfs4_openowner_destroy(rfs4_entry_t u_entry)2212 rfs4_openowner_destroy(rfs4_entry_t u_entry)
2213 {
2214 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2215
2216 /* Remove open owner from client's lists of open owners */
2217 rfs4_dbe_lock(oo->ro_client->rc_dbe);
2218 list_remove(&oo->ro_client->rc_openownerlist, oo);
2219 rfs4_dbe_unlock(oo->ro_client->rc_dbe);
2220
2221 /* One less reference to the client */
2222 rfs4_client_rele(oo->ro_client);
2223 oo->ro_client = NULL;
2224
2225 /* Free the last reply for this lock owner */
2226 rfs4_free_reply(&oo->ro_reply);
2227
2228 if (oo->ro_reply_fh.nfs_fh4_val) {
2229 kmem_free(oo->ro_reply_fh.nfs_fh4_val,
2230 oo->ro_reply_fh.nfs_fh4_len);
2231 oo->ro_reply_fh.nfs_fh4_val = NULL;
2232 oo->ro_reply_fh.nfs_fh4_len = 0;
2233 }
2234
2235 rfs4_sw_destroy(&oo->ro_sw);
2236 list_destroy(&oo->ro_statelist);
2237
2238 /* Free the lock owner id */
2239 kmem_free(oo->ro_owner.owner_val, oo->ro_owner.owner_len);
2240 }
2241
2242 void
rfs4_openowner_rele(rfs4_openowner_t * oo)2243 rfs4_openowner_rele(rfs4_openowner_t *oo)
2244 {
2245 rfs4_dbe_rele(oo->ro_dbe);
2246 }
2247
2248 static bool_t
rfs4_openowner_create(rfs4_entry_t u_entry,void * arg)2249 rfs4_openowner_create(rfs4_entry_t u_entry, void *arg)
2250 {
2251 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2252 rfs4_openowner_t *argp = (rfs4_openowner_t *)arg;
2253 open_owner4 *openowner = &argp->ro_owner;
2254 seqid4 seqid = argp->ro_open_seqid;
2255 rfs4_client_t *cp;
2256 bool_t create = FALSE;
2257 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2258
2259 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2260
2261 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2262 &openowner->clientid,
2263 &create, NULL, RFS4_DBS_VALID);
2264
2265 rw_exit(&nsrv4->rfs4_findclient_lock);
2266
2267 if (cp == NULL)
2268 return (FALSE);
2269
2270 oo->ro_reply_fh.nfs_fh4_len = 0;
2271 oo->ro_reply_fh.nfs_fh4_val = NULL;
2272
2273 oo->ro_owner.clientid = openowner->clientid;
2274 oo->ro_owner.owner_val =
2275 kmem_alloc(openowner->owner_len, KM_SLEEP);
2276
2277 bcopy(openowner->owner_val,
2278 oo->ro_owner.owner_val, openowner->owner_len);
2279
2280 oo->ro_owner.owner_len = openowner->owner_len;
2281
2282 oo->ro_need_confirm = TRUE;
2283
2284 rfs4_sw_init(&oo->ro_sw);
2285
2286 oo->ro_open_seqid = seqid;
2287 bzero(&oo->ro_reply, sizeof (nfs_resop4));
2288 oo->ro_client = cp;
2289
2290 list_create(&oo->ro_statelist, sizeof (rfs4_state_t),
2291 offsetof(rfs4_state_t, rs_node));
2292
2293 /* Insert openowner into client's open owner list */
2294 rfs4_dbe_lock(cp->rc_dbe);
2295 list_insert_tail(&cp->rc_openownerlist, oo);
2296 rfs4_dbe_unlock(cp->rc_dbe);
2297
2298 return (TRUE);
2299 }
2300
2301 rfs4_openowner_t *
rfs4_findopenowner(open_owner4 * openowner,bool_t * create,seqid4 seqid)2302 rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid)
2303 {
2304 rfs4_openowner_t *oo;
2305 rfs4_openowner_t arg;
2306 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2307
2308 arg.ro_owner = *openowner;
2309 arg.ro_open_seqid = seqid;
2310 /* CSTYLED */
2311 oo = (rfs4_openowner_t *)rfs4_dbsearch(nsrv4->rfs4_openowner_idx, openowner,
2312 create, &arg, RFS4_DBS_VALID);
2313
2314 return (oo);
2315 }
2316
2317 void
rfs4_update_open_sequence(rfs4_openowner_t * oo)2318 rfs4_update_open_sequence(rfs4_openowner_t *oo)
2319 {
2320
2321 rfs4_dbe_lock(oo->ro_dbe);
2322
2323 oo->ro_open_seqid++;
2324
2325 rfs4_dbe_unlock(oo->ro_dbe);
2326 }
2327
2328 void
rfs4_update_open_resp(rfs4_openowner_t * oo,nfs_resop4 * resp,nfs_fh4 * fh)2329 rfs4_update_open_resp(rfs4_openowner_t *oo, nfs_resop4 *resp, nfs_fh4 *fh)
2330 {
2331
2332 rfs4_dbe_lock(oo->ro_dbe);
2333
2334 rfs4_free_reply(&oo->ro_reply);
2335
2336 rfs4_copy_reply(&oo->ro_reply, resp);
2337
2338 /* Save the filehandle if provided and free if not used */
2339 if (resp->nfs_resop4_u.opopen.status == NFS4_OK &&
2340 fh && fh->nfs_fh4_len) {
2341 if (oo->ro_reply_fh.nfs_fh4_val == NULL)
2342 oo->ro_reply_fh.nfs_fh4_val =
2343 kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
2344 nfs_fh4_copy(fh, &oo->ro_reply_fh);
2345 } else {
2346 if (oo->ro_reply_fh.nfs_fh4_val) {
2347 kmem_free(oo->ro_reply_fh.nfs_fh4_val,
2348 oo->ro_reply_fh.nfs_fh4_len);
2349 oo->ro_reply_fh.nfs_fh4_val = NULL;
2350 oo->ro_reply_fh.nfs_fh4_len = 0;
2351 }
2352 }
2353
2354 rfs4_dbe_unlock(oo->ro_dbe);
2355 }
2356
2357 static bool_t
lockowner_compare(rfs4_entry_t u_entry,void * key)2358 lockowner_compare(rfs4_entry_t u_entry, void *key)
2359 {
2360 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2361 lock_owner4 *b = (lock_owner4 *)key;
2362
2363 if (lo->rl_owner.clientid != b->clientid)
2364 return (FALSE);
2365
2366 if (lo->rl_owner.owner_len != b->owner_len)
2367 return (FALSE);
2368
2369 return (bcmp(lo->rl_owner.owner_val, b->owner_val,
2370 lo->rl_owner.owner_len) == 0);
2371 }
2372
2373 void *
lockowner_mkkey(rfs4_entry_t u_entry)2374 lockowner_mkkey(rfs4_entry_t u_entry)
2375 {
2376 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2377
2378 return (&lo->rl_owner);
2379 }
2380
2381 static uint32_t
lockowner_hash(void * key)2382 lockowner_hash(void *key)
2383 {
2384 int i;
2385 lock_owner4 *lockowner = key;
2386 uint_t hash = 0;
2387
2388 for (i = 0; i < lockowner->owner_len; i++) {
2389 hash <<= 4;
2390 hash += (uint_t)lockowner->owner_val[i];
2391 }
2392 hash += (uint_t)lockowner->clientid;
2393 hash |= (lockowner->clientid >> 32);
2394
2395 return (hash);
2396 }
2397
2398 static uint32_t
pid_hash(void * key)2399 pid_hash(void *key)
2400 {
2401 return ((uint32_t)(uintptr_t)key);
2402 }
2403
2404 static void *
pid_mkkey(rfs4_entry_t u_entry)2405 pid_mkkey(rfs4_entry_t u_entry)
2406 {
2407 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2408
2409 return ((void *)(uintptr_t)lo->rl_pid);
2410 }
2411
2412 static bool_t
pid_compare(rfs4_entry_t u_entry,void * key)2413 pid_compare(rfs4_entry_t u_entry, void *key)
2414 {
2415 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2416
2417 return (lo->rl_pid == (pid_t)(uintptr_t)key);
2418 }
2419
2420 static void
rfs4_lockowner_destroy(rfs4_entry_t u_entry)2421 rfs4_lockowner_destroy(rfs4_entry_t u_entry)
2422 {
2423 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2424
2425 /* Free the lock owner id */
2426 kmem_free(lo->rl_owner.owner_val, lo->rl_owner.owner_len);
2427 rfs4_client_rele(lo->rl_client);
2428 }
2429
2430 void
rfs4_lockowner_rele(rfs4_lockowner_t * lo)2431 rfs4_lockowner_rele(rfs4_lockowner_t *lo)
2432 {
2433 rfs4_dbe_rele(lo->rl_dbe);
2434 }
2435
2436 /* ARGSUSED */
2437 static bool_t
rfs4_lockowner_expiry(rfs4_entry_t u_entry)2438 rfs4_lockowner_expiry(rfs4_entry_t u_entry)
2439 {
2440 /*
2441 * Since expiry is called with no other references on
2442 * this struct, go ahead and have it removed.
2443 */
2444 return (TRUE);
2445 }
2446
2447 static bool_t
rfs4_lockowner_create(rfs4_entry_t u_entry,void * arg)2448 rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg)
2449 {
2450 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2451 lock_owner4 *lockowner = (lock_owner4 *)arg;
2452 rfs4_client_t *cp;
2453 bool_t create = FALSE;
2454 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2455
2456 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2457
2458 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2459 &lockowner->clientid,
2460 &create, NULL, RFS4_DBS_VALID);
2461
2462 rw_exit(&nsrv4->rfs4_findclient_lock);
2463
2464 if (cp == NULL)
2465 return (FALSE);
2466
2467 /* Reference client */
2468 lo->rl_client = cp;
2469 lo->rl_owner.clientid = lockowner->clientid;
2470 lo->rl_owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP);
2471 bcopy(lockowner->owner_val, lo->rl_owner.owner_val,
2472 lockowner->owner_len);
2473 lo->rl_owner.owner_len = lockowner->owner_len;
2474 lo->rl_pid = rfs4_dbe_getid(lo->rl_dbe);
2475
2476 return (TRUE);
2477 }
2478
2479 rfs4_lockowner_t *
rfs4_findlockowner(lock_owner4 * lockowner,bool_t * create)2480 rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create)
2481 {
2482 rfs4_lockowner_t *lo;
2483 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2484
2485 /* CSTYLED */
2486 lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_idx, lockowner,
2487 create, lockowner, RFS4_DBS_VALID);
2488
2489 return (lo);
2490 }
2491
2492 rfs4_lockowner_t *
rfs4_findlockowner_by_pid(pid_t pid)2493 rfs4_findlockowner_by_pid(pid_t pid)
2494 {
2495 rfs4_lockowner_t *lo;
2496 bool_t create = FALSE;
2497 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2498
2499 lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_pid_idx,
2500 (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID);
2501
2502 return (lo);
2503 }
2504
2505
2506 static uint32_t
file_hash(void * key)2507 file_hash(void *key)
2508 {
2509 return (ADDRHASH(key));
2510 }
2511
2512 static void *
file_mkkey(rfs4_entry_t u_entry)2513 file_mkkey(rfs4_entry_t u_entry)
2514 {
2515 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2516
2517 return (fp->rf_vp);
2518 }
2519
2520 static bool_t
file_compare(rfs4_entry_t u_entry,void * key)2521 file_compare(rfs4_entry_t u_entry, void *key)
2522 {
2523 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2524
2525 return (fp->rf_vp == (vnode_t *)key);
2526 }
2527
2528 static void
rfs4_file_destroy(rfs4_entry_t u_entry)2529 rfs4_file_destroy(rfs4_entry_t u_entry)
2530 {
2531 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2532
2533 list_destroy(&fp->rf_delegstatelist);
2534
2535 if (fp->rf_filehandle.nfs_fh4_val)
2536 kmem_free(fp->rf_filehandle.nfs_fh4_val,
2537 fp->rf_filehandle.nfs_fh4_len);
2538 cv_destroy(fp->rf_dinfo.rd_recall_cv);
2539 if (fp->rf_vp) {
2540 vnode_t *vp = fp->rf_vp;
2541
2542 mutex_enter(&vp->v_vsd_lock);
2543 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
2544 mutex_exit(&vp->v_vsd_lock);
2545 VN_RELE(vp);
2546 fp->rf_vp = NULL;
2547 }
2548 rw_destroy(&fp->rf_file_rwlock);
2549 }
2550
2551 /*
2552 * Used to unlock the underlying dbe struct only
2553 */
2554 void
rfs4_file_rele(rfs4_file_t * fp)2555 rfs4_file_rele(rfs4_file_t *fp)
2556 {
2557 rfs4_dbe_rele(fp->rf_dbe);
2558 }
2559
2560 typedef struct {
2561 vnode_t *vp;
2562 nfs_fh4 *fh;
2563 } rfs4_fcreate_arg;
2564
2565 static bool_t
rfs4_file_create(rfs4_entry_t u_entry,void * arg)2566 rfs4_file_create(rfs4_entry_t u_entry, void *arg)
2567 {
2568 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2569 rfs4_fcreate_arg *ap = (rfs4_fcreate_arg *)arg;
2570 vnode_t *vp = ap->vp;
2571 nfs_fh4 *fh = ap->fh;
2572
2573 VN_HOLD(vp);
2574
2575 fp->rf_filehandle.nfs_fh4_len = 0;
2576 fp->rf_filehandle.nfs_fh4_val = NULL;
2577 ASSERT(fh && fh->nfs_fh4_len);
2578 if (fh && fh->nfs_fh4_len) {
2579 fp->rf_filehandle.nfs_fh4_val =
2580 kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
2581 nfs_fh4_copy(fh, &fp->rf_filehandle);
2582 }
2583 fp->rf_vp = vp;
2584
2585 list_create(&fp->rf_delegstatelist, sizeof (rfs4_deleg_state_t),
2586 offsetof(rfs4_deleg_state_t, rds_node));
2587
2588 fp->rf_share_deny = fp->rf_share_access = fp->rf_access_read = 0;
2589 fp->rf_access_write = fp->rf_deny_read = fp->rf_deny_write = 0;
2590
2591 mutex_init(fp->rf_dinfo.rd_recall_lock, NULL, MUTEX_DEFAULT, NULL);
2592 cv_init(fp->rf_dinfo.rd_recall_cv, NULL, CV_DEFAULT, NULL);
2593
2594 fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
2595
2596 rw_init(&fp->rf_file_rwlock, NULL, RW_DEFAULT, NULL);
2597
2598 mutex_enter(&vp->v_vsd_lock);
2599 VERIFY(vsd_set(vp, nfs4_srv_vkey, (void *)fp) == 0);
2600 mutex_exit(&vp->v_vsd_lock);
2601
2602 return (TRUE);
2603 }
2604
2605 rfs4_file_t *
rfs4_findfile(vnode_t * vp,nfs_fh4 * fh,bool_t * create)2606 rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2607 {
2608 rfs4_file_t *fp;
2609 rfs4_fcreate_arg arg;
2610 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2611
2612 arg.vp = vp;
2613 arg.fh = fh;
2614
2615 if (*create == TRUE)
2616 /* CSTYLED */
2617 fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp, create,
2618 &arg, RFS4_DBS_VALID);
2619 else {
2620 mutex_enter(&vp->v_vsd_lock);
2621 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2622 if (fp) {
2623 rfs4_dbe_lock(fp->rf_dbe);
2624 if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2625 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2626 rfs4_dbe_unlock(fp->rf_dbe);
2627 fp = NULL;
2628 } else {
2629 rfs4_dbe_hold(fp->rf_dbe);
2630 rfs4_dbe_unlock(fp->rf_dbe);
2631 }
2632 }
2633 mutex_exit(&vp->v_vsd_lock);
2634 }
2635 return (fp);
2636 }
2637
2638 /*
2639 * Find a file in the db and once it is located, take the rw lock.
2640 * Need to check the vnode pointer and if it does not exist (it was
2641 * removed between the db location and check) redo the find. This
2642 * assumes that a file struct that has a NULL vnode pointer is marked
2643 * at 'invalid' and will not be found in the db the second time
2644 * around.
2645 */
2646 rfs4_file_t *
rfs4_findfile_withlock(vnode_t * vp,nfs_fh4 * fh,bool_t * create)2647 rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2648 {
2649 rfs4_file_t *fp;
2650 rfs4_fcreate_arg arg;
2651 bool_t screate = *create;
2652 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2653
2654 if (screate == FALSE) {
2655 mutex_enter(&vp->v_vsd_lock);
2656 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2657 if (fp) {
2658 rfs4_dbe_lock(fp->rf_dbe);
2659 if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2660 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2661 rfs4_dbe_unlock(fp->rf_dbe);
2662 mutex_exit(&vp->v_vsd_lock);
2663 fp = NULL;
2664 } else {
2665 rfs4_dbe_hold(fp->rf_dbe);
2666 rfs4_dbe_unlock(fp->rf_dbe);
2667 mutex_exit(&vp->v_vsd_lock);
2668 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2669 if (fp->rf_vp == NULL) {
2670 rw_exit(&fp->rf_file_rwlock);
2671 rfs4_file_rele(fp);
2672 fp = NULL;
2673 }
2674 }
2675 } else {
2676 mutex_exit(&vp->v_vsd_lock);
2677 }
2678 } else {
2679 retry:
2680 arg.vp = vp;
2681 arg.fh = fh;
2682
2683 fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp,
2684 create, &arg, RFS4_DBS_VALID);
2685 if (fp != NULL) {
2686 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2687 if (fp->rf_vp == NULL) {
2688 rw_exit(&fp->rf_file_rwlock);
2689 rfs4_file_rele(fp);
2690 *create = screate;
2691 goto retry;
2692 }
2693 }
2694 }
2695
2696 return (fp);
2697 }
2698
2699 static uint32_t
lo_state_hash(void * key)2700 lo_state_hash(void *key)
2701 {
2702 stateid_t *id = key;
2703
2704 return (id->bits.ident+id->bits.pid);
2705 }
2706
2707 static bool_t
lo_state_compare(rfs4_entry_t u_entry,void * key)2708 lo_state_compare(rfs4_entry_t u_entry, void *key)
2709 {
2710 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2711 stateid_t *id = key;
2712 bool_t rc;
2713
2714 rc = (lsp->rls_lockid.bits.boottime == id->bits.boottime &&
2715 lsp->rls_lockid.bits.type == id->bits.type &&
2716 lsp->rls_lockid.bits.ident == id->bits.ident &&
2717 lsp->rls_lockid.bits.pid == id->bits.pid);
2718
2719 return (rc);
2720 }
2721
2722 static void *
lo_state_mkkey(rfs4_entry_t u_entry)2723 lo_state_mkkey(rfs4_entry_t u_entry)
2724 {
2725 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2726
2727 return (&lsp->rls_lockid);
2728 }
2729
2730 static bool_t
rfs4_lo_state_expiry(rfs4_entry_t u_entry)2731 rfs4_lo_state_expiry(rfs4_entry_t u_entry)
2732 {
2733 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2734
2735 if (rfs4_dbe_is_invalid(lsp->rls_dbe))
2736 return (TRUE);
2737 if (lsp->rls_state->rs_closed)
2738 return (TRUE);
2739 return ((gethrestime_sec() -
2740 lsp->rls_state->rs_owner->ro_client->rc_last_access
2741 > rfs4_lease_time));
2742 }
2743
2744 static void
rfs4_lo_state_destroy(rfs4_entry_t u_entry)2745 rfs4_lo_state_destroy(rfs4_entry_t u_entry)
2746 {
2747 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2748
2749 rfs4_dbe_lock(lsp->rls_state->rs_dbe);
2750 list_remove(&lsp->rls_state->rs_lostatelist, lsp);
2751 rfs4_dbe_unlock(lsp->rls_state->rs_dbe);
2752
2753 rfs4_sw_destroy(&lsp->rls_sw);
2754
2755 /* Make sure to release the file locks */
2756 if (lsp->rls_locks_cleaned == FALSE) {
2757 lsp->rls_locks_cleaned = TRUE;
2758 if (lsp->rls_locker->rl_client->rc_sysidt != LM_NOSYSID) {
2759 /* Is the PxFS kernel module loaded? */
2760 if (lm_remove_file_locks != NULL) {
2761 int new_sysid;
2762
2763 /* Encode the cluster nodeid in new sysid */
2764 new_sysid =
2765 lsp->rls_locker->rl_client->rc_sysidt;
2766 lm_set_nlmid_flk(&new_sysid);
2767
2768 /*
2769 * This PxFS routine removes file locks for a
2770 * client over all nodes of a cluster.
2771 */
2772 DTRACE_PROBE1(nfss_i_clust_rm_lck,
2773 int, new_sysid);
2774 (*lm_remove_file_locks)(new_sysid);
2775 } else {
2776 (void) cleanlocks(
2777 lsp->rls_state->rs_finfo->rf_vp,
2778 lsp->rls_locker->rl_pid,
2779 lsp->rls_locker->rl_client->rc_sysidt);
2780 }
2781 }
2782 }
2783
2784 /* Free the last reply for this state */
2785 rfs4_free_reply(&lsp->rls_reply);
2786
2787 rfs4_lockowner_rele(lsp->rls_locker);
2788 lsp->rls_locker = NULL;
2789
2790 rfs4_state_rele_nounlock(lsp->rls_state);
2791 lsp->rls_state = NULL;
2792 }
2793
2794 static bool_t
rfs4_lo_state_create(rfs4_entry_t u_entry,void * arg)2795 rfs4_lo_state_create(rfs4_entry_t u_entry, void *arg)
2796 {
2797 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2798 rfs4_lo_state_t *argp = (rfs4_lo_state_t *)arg;
2799 rfs4_lockowner_t *lo = argp->rls_locker;
2800 rfs4_state_t *sp = argp->rls_state;
2801
2802 lsp->rls_state = sp;
2803
2804 lsp->rls_lockid = sp->rs_stateid;
2805 lsp->rls_lockid.bits.type = LOCKID;
2806 lsp->rls_lockid.bits.chgseq = 0;
2807 lsp->rls_lockid.bits.pid = lo->rl_pid;
2808
2809 lsp->rls_locks_cleaned = FALSE;
2810 lsp->rls_lock_completed = FALSE;
2811
2812 rfs4_sw_init(&lsp->rls_sw);
2813
2814 /* Attached the supplied lock owner */
2815 rfs4_dbe_hold(lo->rl_dbe);
2816 lsp->rls_locker = lo;
2817
2818 rfs4_dbe_lock(sp->rs_dbe);
2819 list_insert_tail(&sp->rs_lostatelist, lsp);
2820 rfs4_dbe_hold(sp->rs_dbe);
2821 rfs4_dbe_unlock(sp->rs_dbe);
2822
2823 return (TRUE);
2824 }
2825
2826 void
rfs4_lo_state_rele(rfs4_lo_state_t * lsp,bool_t unlock_fp)2827 rfs4_lo_state_rele(rfs4_lo_state_t *lsp, bool_t unlock_fp)
2828 {
2829 if (unlock_fp == TRUE)
2830 rw_exit(&lsp->rls_state->rs_finfo->rf_file_rwlock);
2831 rfs4_dbe_rele(lsp->rls_dbe);
2832 }
2833
2834 static rfs4_lo_state_t *
rfs4_findlo_state(stateid_t * id,bool_t lock_fp)2835 rfs4_findlo_state(stateid_t *id, bool_t lock_fp)
2836 {
2837 rfs4_lo_state_t *lsp;
2838 bool_t create = FALSE;
2839 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2840
2841 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_idx, id,
2842 &create, NULL, RFS4_DBS_VALID);
2843 if (lock_fp == TRUE && lsp != NULL)
2844 rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER);
2845
2846 return (lsp);
2847 }
2848
2849
2850 static uint32_t
lo_state_lo_hash(void * key)2851 lo_state_lo_hash(void *key)
2852 {
2853 rfs4_lo_state_t *lsp = key;
2854
2855 return (ADDRHASH(lsp->rls_locker) ^ ADDRHASH(lsp->rls_state));
2856 }
2857
2858 static bool_t
lo_state_lo_compare(rfs4_entry_t u_entry,void * key)2859 lo_state_lo_compare(rfs4_entry_t u_entry, void *key)
2860 {
2861 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2862 rfs4_lo_state_t *keyp = key;
2863
2864 return (keyp->rls_locker == lsp->rls_locker &&
2865 keyp->rls_state == lsp->rls_state);
2866 }
2867
2868 static void *
lo_state_lo_mkkey(rfs4_entry_t u_entry)2869 lo_state_lo_mkkey(rfs4_entry_t u_entry)
2870 {
2871 return (u_entry);
2872 }
2873
2874 rfs4_lo_state_t *
rfs4_findlo_state_by_owner(rfs4_lockowner_t * lo,rfs4_state_t * sp,bool_t * create)2875 rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp,
2876 bool_t *create)
2877 {
2878 rfs4_lo_state_t *lsp;
2879 rfs4_lo_state_t arg;
2880 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2881
2882 arg.rls_locker = lo;
2883 arg.rls_state = sp;
2884
2885 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_owner_idx,
2886 &arg, create, &arg, RFS4_DBS_VALID);
2887
2888 return (lsp);
2889 }
2890
2891 static stateid_t
get_stateid(id_t eid)2892 get_stateid(id_t eid)
2893 {
2894 stateid_t id;
2895 nfs4_srv_t *nsrv4;
2896
2897 nsrv4 = nfs4_get_srv();
2898
2899 id.bits.boottime = nsrv4->rfs4_start_time;
2900 id.bits.ident = eid;
2901 id.bits.chgseq = 0;
2902 id.bits.type = 0;
2903 id.bits.pid = 0;
2904
2905 /*
2906 * If we are booted as a cluster node, embed our nodeid.
2907 * We've already done sanity checks in rfs4_client_create() so no
2908 * need to repeat them here.
2909 */
2910 id.bits.clnodeid = (cluster_bootflags & CLUSTER_BOOTED) ?
2911 clconf_get_nodeid() : 0;
2912
2913 return (id);
2914 }
2915
2916 /*
2917 * For use only when booted as a cluster node.
2918 * Returns TRUE if the embedded nodeid indicates that this stateid was
2919 * generated on another node.
2920 */
2921 static int
foreign_stateid(stateid_t * id)2922 foreign_stateid(stateid_t *id)
2923 {
2924 ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2925 return (id->bits.clnodeid != (uint32_t)clconf_get_nodeid());
2926 }
2927
2928 /*
2929 * For use only when booted as a cluster node.
2930 * Returns TRUE if the embedded nodeid indicates that this clientid was
2931 * generated on another node.
2932 */
2933 static int
foreign_clientid(cid * cidp)2934 foreign_clientid(cid *cidp)
2935 {
2936 ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2937 return (cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT !=
2938 (uint32_t)clconf_get_nodeid());
2939 }
2940
2941 /*
2942 * For use only when booted as a cluster node.
2943 * Embed our cluster nodeid into the clientid.
2944 */
2945 static void
embed_nodeid(cid * cidp)2946 embed_nodeid(cid *cidp)
2947 {
2948 int clnodeid;
2949 /*
2950 * Currently, our state tables are small enough that their
2951 * ids will leave enough bits free for the nodeid. If the
2952 * tables become larger, we mustn't overwrite the id.
2953 * Equally, we only have room for so many bits of nodeid, so
2954 * must check that too.
2955 */
2956 ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2957 ASSERT(cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT == 0);
2958 clnodeid = clconf_get_nodeid();
2959 ASSERT(clnodeid <= CLUSTER_MAX_NODEID);
2960 ASSERT(clnodeid != NODEID_UNKNOWN);
2961 cidp->impl_id.c_id |= (clnodeid << CLUSTER_NODEID_SHIFT);
2962 }
2963
2964 static uint32_t
state_hash(void * key)2965 state_hash(void *key)
2966 {
2967 stateid_t *ip = (stateid_t *)key;
2968
2969 return (ip->bits.ident);
2970 }
2971
2972 static bool_t
state_compare(rfs4_entry_t u_entry,void * key)2973 state_compare(rfs4_entry_t u_entry, void *key)
2974 {
2975 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2976 stateid_t *id = (stateid_t *)key;
2977 bool_t rc;
2978
2979 rc = (sp->rs_stateid.bits.boottime == id->bits.boottime &&
2980 sp->rs_stateid.bits.ident == id->bits.ident);
2981
2982 return (rc);
2983 }
2984
2985 static void *
state_mkkey(rfs4_entry_t u_entry)2986 state_mkkey(rfs4_entry_t u_entry)
2987 {
2988 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2989
2990 return (&sp->rs_stateid);
2991 }
2992
2993 static void
rfs4_state_destroy(rfs4_entry_t u_entry)2994 rfs4_state_destroy(rfs4_entry_t u_entry)
2995 {
2996 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2997
2998 /* remove from openowner list */
2999 rfs4_dbe_lock(sp->rs_owner->ro_dbe);
3000 list_remove(&sp->rs_owner->ro_statelist, sp);
3001 rfs4_dbe_unlock(sp->rs_owner->ro_dbe);
3002
3003 list_destroy(&sp->rs_lostatelist);
3004
3005 /* release any share locks for this stateid if it's still open */
3006 if (!sp->rs_closed) {
3007 rfs4_dbe_lock(sp->rs_dbe);
3008 (void) rfs4_unshare(sp);
3009 rfs4_dbe_unlock(sp->rs_dbe);
3010 }
3011
3012 /* Were done with the file */
3013 rfs4_file_rele(sp->rs_finfo);
3014 sp->rs_finfo = NULL;
3015
3016 /* And now with the openowner */
3017 rfs4_openowner_rele(sp->rs_owner);
3018 sp->rs_owner = NULL;
3019 }
3020
3021 static void
rfs4_state_rele_nounlock(rfs4_state_t * sp)3022 rfs4_state_rele_nounlock(rfs4_state_t *sp)
3023 {
3024 rfs4_dbe_rele(sp->rs_dbe);
3025 }
3026
3027 void
rfs4_state_rele(rfs4_state_t * sp)3028 rfs4_state_rele(rfs4_state_t *sp)
3029 {
3030 rw_exit(&sp->rs_finfo->rf_file_rwlock);
3031 rfs4_dbe_rele(sp->rs_dbe);
3032 }
3033
3034 static uint32_t
deleg_hash(void * key)3035 deleg_hash(void *key)
3036 {
3037 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)key;
3038
3039 return (ADDRHASH(dsp->rds_client) ^ ADDRHASH(dsp->rds_finfo));
3040 }
3041
3042 static bool_t
deleg_compare(rfs4_entry_t u_entry,void * key)3043 deleg_compare(rfs4_entry_t u_entry, void *key)
3044 {
3045 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3046 rfs4_deleg_state_t *kdsp = (rfs4_deleg_state_t *)key;
3047
3048 return (dsp->rds_client == kdsp->rds_client &&
3049 dsp->rds_finfo == kdsp->rds_finfo);
3050 }
3051
3052 static void *
deleg_mkkey(rfs4_entry_t u_entry)3053 deleg_mkkey(rfs4_entry_t u_entry)
3054 {
3055 return (u_entry);
3056 }
3057
3058 static uint32_t
deleg_state_hash(void * key)3059 deleg_state_hash(void *key)
3060 {
3061 stateid_t *ip = (stateid_t *)key;
3062
3063 return (ip->bits.ident);
3064 }
3065
3066 static bool_t
deleg_state_compare(rfs4_entry_t u_entry,void * key)3067 deleg_state_compare(rfs4_entry_t u_entry, void *key)
3068 {
3069 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3070 stateid_t *id = (stateid_t *)key;
3071 bool_t rc;
3072
3073 if (id->bits.type != DELEGID)
3074 return (FALSE);
3075
3076 rc = (dsp->rds_delegid.bits.boottime == id->bits.boottime &&
3077 dsp->rds_delegid.bits.ident == id->bits.ident);
3078
3079 return (rc);
3080 }
3081
3082 static void *
deleg_state_mkkey(rfs4_entry_t u_entry)3083 deleg_state_mkkey(rfs4_entry_t u_entry)
3084 {
3085 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3086
3087 return (&dsp->rds_delegid);
3088 }
3089
3090 static bool_t
rfs4_deleg_state_expiry(rfs4_entry_t u_entry)3091 rfs4_deleg_state_expiry(rfs4_entry_t u_entry)
3092 {
3093 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3094
3095 if (rfs4_dbe_is_invalid(dsp->rds_dbe))
3096 return (TRUE);
3097
3098 if (dsp->rds_dtype == OPEN_DELEGATE_NONE)
3099 return (TRUE);
3100
3101 if ((gethrestime_sec() - dsp->rds_client->rc_last_access
3102 > rfs4_lease_time)) {
3103 rfs4_dbe_invalidate(dsp->rds_dbe);
3104 return (TRUE);
3105 }
3106
3107 return (FALSE);
3108 }
3109
3110 static bool_t
rfs4_deleg_state_create(rfs4_entry_t u_entry,void * argp)3111 rfs4_deleg_state_create(rfs4_entry_t u_entry, void *argp)
3112 {
3113 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3114 rfs4_file_t *fp = ((rfs4_deleg_state_t *)argp)->rds_finfo;
3115 rfs4_client_t *cp = ((rfs4_deleg_state_t *)argp)->rds_client;
3116
3117 rfs4_dbe_hold(fp->rf_dbe);
3118 rfs4_dbe_hold(cp->rc_dbe);
3119
3120 dsp->rds_delegid = get_stateid(rfs4_dbe_getid(dsp->rds_dbe));
3121 dsp->rds_delegid.bits.type = DELEGID;
3122 dsp->rds_finfo = fp;
3123 dsp->rds_client = cp;
3124 dsp->rds_dtype = OPEN_DELEGATE_NONE;
3125
3126 dsp->rds_time_granted = gethrestime_sec(); /* observability */
3127 dsp->rds_time_revoked = 0;
3128
3129 list_link_init(&dsp->rds_node);
3130
3131 return (TRUE);
3132 }
3133
3134 static void
rfs4_deleg_state_destroy(rfs4_entry_t u_entry)3135 rfs4_deleg_state_destroy(rfs4_entry_t u_entry)
3136 {
3137 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3138
3139 /* return delegation if necessary */
3140 rfs4_return_deleg(dsp, FALSE);
3141
3142 /* Were done with the file */
3143 rfs4_file_rele(dsp->rds_finfo);
3144 dsp->rds_finfo = NULL;
3145
3146 /* And now with the openowner */
3147 rfs4_client_rele(dsp->rds_client);
3148 dsp->rds_client = NULL;
3149 }
3150
3151 rfs4_deleg_state_t *
rfs4_finddeleg(rfs4_state_t * sp,bool_t * create)3152 rfs4_finddeleg(rfs4_state_t *sp, bool_t *create)
3153 {
3154 rfs4_deleg_state_t ds, *dsp;
3155 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3156
3157 ds.rds_client = sp->rs_owner->ro_client;
3158 ds.rds_finfo = sp->rs_finfo;
3159
3160 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_idx, &ds,
3161 create, &ds, RFS4_DBS_VALID);
3162
3163 return (dsp);
3164 }
3165
3166 rfs4_deleg_state_t *
rfs4_finddelegstate(stateid_t * id)3167 rfs4_finddelegstate(stateid_t *id)
3168 {
3169 rfs4_deleg_state_t *dsp;
3170 bool_t create = FALSE;
3171 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3172
3173 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_state_idx,
3174 id, &create, NULL, RFS4_DBS_VALID);
3175
3176 return (dsp);
3177 }
3178
3179 void
rfs4_deleg_state_rele(rfs4_deleg_state_t * dsp)3180 rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp)
3181 {
3182 rfs4_dbe_rele(dsp->rds_dbe);
3183 }
3184
3185 void
rfs4_update_lock_sequence(rfs4_lo_state_t * lsp)3186 rfs4_update_lock_sequence(rfs4_lo_state_t *lsp)
3187 {
3188
3189 rfs4_dbe_lock(lsp->rls_dbe);
3190
3191 /*
3192 * If we are skipping sequence id checking, this means that
3193 * this is the first lock request and therefore the sequence
3194 * id does not need to be updated. This only happens on the
3195 * first lock request for a lockowner
3196 */
3197 if (!lsp->rls_skip_seqid_check)
3198 lsp->rls_seqid++;
3199
3200 rfs4_dbe_unlock(lsp->rls_dbe);
3201 }
3202
3203 void
rfs4_update_lock_resp(rfs4_lo_state_t * lsp,nfs_resop4 * resp)3204 rfs4_update_lock_resp(rfs4_lo_state_t *lsp, nfs_resop4 *resp)
3205 {
3206
3207 rfs4_dbe_lock(lsp->rls_dbe);
3208
3209 rfs4_free_reply(&lsp->rls_reply);
3210
3211 rfs4_copy_reply(&lsp->rls_reply, resp);
3212
3213 rfs4_dbe_unlock(lsp->rls_dbe);
3214 }
3215
3216 void
rfs4_free_opens(rfs4_openowner_t * oo,bool_t invalidate,bool_t close_of_client)3217 rfs4_free_opens(rfs4_openowner_t *oo, bool_t invalidate,
3218 bool_t close_of_client)
3219 {
3220 rfs4_state_t *sp;
3221
3222 rfs4_dbe_lock(oo->ro_dbe);
3223
3224 for (sp = list_head(&oo->ro_statelist); sp != NULL;
3225 sp = list_next(&oo->ro_statelist, sp)) {
3226 rfs4_state_close(sp, FALSE, close_of_client, CRED());
3227 if (invalidate == TRUE)
3228 rfs4_dbe_invalidate(sp->rs_dbe);
3229 }
3230
3231 rfs4_dbe_invalidate(oo->ro_dbe);
3232 rfs4_dbe_unlock(oo->ro_dbe);
3233 }
3234
3235 static uint32_t
state_owner_file_hash(void * key)3236 state_owner_file_hash(void *key)
3237 {
3238 rfs4_state_t *sp = key;
3239
3240 return (ADDRHASH(sp->rs_owner) ^ ADDRHASH(sp->rs_finfo));
3241 }
3242
3243 static bool_t
state_owner_file_compare(rfs4_entry_t u_entry,void * key)3244 state_owner_file_compare(rfs4_entry_t u_entry, void *key)
3245 {
3246 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3247 rfs4_state_t *arg = key;
3248
3249 if (sp->rs_closed == TRUE)
3250 return (FALSE);
3251
3252 return (arg->rs_owner == sp->rs_owner && arg->rs_finfo == sp->rs_finfo);
3253 }
3254
3255 static void *
state_owner_file_mkkey(rfs4_entry_t u_entry)3256 state_owner_file_mkkey(rfs4_entry_t u_entry)
3257 {
3258 return (u_entry);
3259 }
3260
3261 static uint32_t
state_file_hash(void * key)3262 state_file_hash(void *key)
3263 {
3264 return (ADDRHASH(key));
3265 }
3266
3267 static bool_t
state_file_compare(rfs4_entry_t u_entry,void * key)3268 state_file_compare(rfs4_entry_t u_entry, void *key)
3269 {
3270 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3271 rfs4_file_t *fp = key;
3272
3273 if (sp->rs_closed == TRUE)
3274 return (FALSE);
3275
3276 return (fp == sp->rs_finfo);
3277 }
3278
3279 static void *
state_file_mkkey(rfs4_entry_t u_entry)3280 state_file_mkkey(rfs4_entry_t u_entry)
3281 {
3282 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3283
3284 return (sp->rs_finfo);
3285 }
3286
3287 rfs4_state_t *
rfs4_findstate_by_owner_file(rfs4_openowner_t * oo,rfs4_file_t * fp,bool_t * create)3288 rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp,
3289 bool_t *create)
3290 {
3291 rfs4_state_t *sp;
3292 rfs4_state_t key;
3293 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3294
3295 key.rs_owner = oo;
3296 key.rs_finfo = fp;
3297
3298 sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_owner_file_idx,
3299 &key, create, &key, RFS4_DBS_VALID);
3300
3301 return (sp);
3302 }
3303
3304 /* This returns ANY state struct that refers to this file */
3305 static rfs4_state_t *
rfs4_findstate_by_file(rfs4_file_t * fp)3306 rfs4_findstate_by_file(rfs4_file_t *fp)
3307 {
3308 bool_t create = FALSE;
3309 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3310
3311 return ((rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_file_idx, fp,
3312 &create, fp, RFS4_DBS_VALID));
3313 }
3314
3315 static bool_t
rfs4_state_expiry(rfs4_entry_t u_entry)3316 rfs4_state_expiry(rfs4_entry_t u_entry)
3317 {
3318 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3319
3320 if (rfs4_dbe_is_invalid(sp->rs_dbe))
3321 return (TRUE);
3322
3323 if (sp->rs_closed == TRUE &&
3324 ((gethrestime_sec() - rfs4_dbe_get_timerele(sp->rs_dbe))
3325 > rfs4_lease_time))
3326 return (TRUE);
3327
3328 return ((gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access
3329 > rfs4_lease_time));
3330 }
3331
3332 static bool_t
rfs4_state_create(rfs4_entry_t u_entry,void * argp)3333 rfs4_state_create(rfs4_entry_t u_entry, void *argp)
3334 {
3335 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3336 rfs4_file_t *fp = ((rfs4_state_t *)argp)->rs_finfo;
3337 rfs4_openowner_t *oo = ((rfs4_state_t *)argp)->rs_owner;
3338
3339 rfs4_dbe_hold(fp->rf_dbe);
3340 rfs4_dbe_hold(oo->ro_dbe);
3341 sp->rs_stateid = get_stateid(rfs4_dbe_getid(sp->rs_dbe));
3342 sp->rs_stateid.bits.type = OPENID;
3343 sp->rs_owner = oo;
3344 sp->rs_finfo = fp;
3345
3346 list_create(&sp->rs_lostatelist, sizeof (rfs4_lo_state_t),
3347 offsetof(rfs4_lo_state_t, rls_node));
3348
3349 /* Insert state on per open owner's list */
3350 rfs4_dbe_lock(oo->ro_dbe);
3351 list_insert_tail(&oo->ro_statelist, sp);
3352 rfs4_dbe_unlock(oo->ro_dbe);
3353
3354 return (TRUE);
3355 }
3356
3357 static rfs4_state_t *
rfs4_findstate(stateid_t * id,rfs4_dbsearch_type_t find_invalid,bool_t lock_fp)3358 rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3359 {
3360 rfs4_state_t *sp;
3361 bool_t create = FALSE;
3362 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3363
3364 sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_idx, id,
3365 &create, NULL, find_invalid);
3366 if (lock_fp == TRUE && sp != NULL)
3367 rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER);
3368
3369 return (sp);
3370 }
3371
3372 void
rfs4_state_close(rfs4_state_t * sp,bool_t lock_held,bool_t close_of_client,cred_t * cr)3373 rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, bool_t close_of_client,
3374 cred_t *cr)
3375 {
3376 /* Remove the associated lo_state owners */
3377 if (!lock_held)
3378 rfs4_dbe_lock(sp->rs_dbe);
3379
3380 /*
3381 * If refcnt == 0, the dbe is about to be destroyed.
3382 * lock state will be released by the reaper thread.
3383 */
3384
3385 if (rfs4_dbe_refcnt(sp->rs_dbe) > 0) {
3386 if (sp->rs_closed == FALSE) {
3387 rfs4_release_share_lock_state(sp, cr, close_of_client);
3388 sp->rs_closed = TRUE;
3389 }
3390 }
3391
3392 if (!lock_held)
3393 rfs4_dbe_unlock(sp->rs_dbe);
3394 }
3395
3396 /*
3397 * Remove all state associated with the given client.
3398 */
3399 void
rfs4_client_state_remove(rfs4_client_t * cp)3400 rfs4_client_state_remove(rfs4_client_t *cp)
3401 {
3402 rfs4_openowner_t *oo;
3403
3404 rfs4_dbe_lock(cp->rc_dbe);
3405
3406 for (oo = list_head(&cp->rc_openownerlist); oo != NULL;
3407 oo = list_next(&cp->rc_openownerlist, oo)) {
3408 rfs4_free_opens(oo, TRUE, TRUE);
3409 }
3410
3411 rfs4_dbe_unlock(cp->rc_dbe);
3412 }
3413
3414 void
rfs4_client_close(rfs4_client_t * cp)3415 rfs4_client_close(rfs4_client_t *cp)
3416 {
3417 /* Mark client as going away. */
3418 rfs4_dbe_lock(cp->rc_dbe);
3419 rfs4_dbe_invalidate(cp->rc_dbe);
3420 rfs4_dbe_unlock(cp->rc_dbe);
3421
3422 rfs4_client_state_remove(cp);
3423 rfs4x_client_session_remove(cp);
3424
3425 /* Release the client */
3426 rfs4_client_rele(cp);
3427 }
3428
3429 nfsstat4
rfs4_check_clientid(clientid4 * cp,int setclid_confirm)3430 rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
3431 {
3432 cid *cidp = (cid *) cp;
3433 nfs4_srv_t *nsrv4;
3434
3435 nsrv4 = nfs4_get_srv();
3436
3437 /*
3438 * If we are booted as a cluster node, check the embedded nodeid.
3439 * If it indicates that this clientid was generated on another node,
3440 * inform the client accordingly.
3441 */
3442 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
3443 return (NFS4ERR_STALE_CLIENTID);
3444
3445 /*
3446 * If the server start time matches the time provided
3447 * by the client (via the clientid) and this is NOT a
3448 * setclientid_confirm then return EXPIRED.
3449 */
3450 if (!setclid_confirm &&
3451 cidp->impl_id.start_time == nsrv4->rfs4_start_time)
3452 return (NFS4ERR_EXPIRED);
3453
3454 return (NFS4ERR_STALE_CLIENTID);
3455 }
3456
3457 /*
3458 * This is used when a stateid has not been found amongst the
3459 * current server's state. Check the stateid to see if it
3460 * was from this server instantiation or not.
3461 */
3462 static nfsstat4
what_stateid_error(stateid_t * id,stateid_type_t type)3463 what_stateid_error(stateid_t *id, stateid_type_t type)
3464 {
3465 nfs4_srv_t *nsrv4;
3466
3467 nsrv4 = nfs4_get_srv();
3468
3469 /* If we are booted as a cluster node, was stateid locally generated? */
3470 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3471 return (NFS4ERR_STALE_STATEID);
3472
3473 /* If types don't match then no use checking further */
3474 if (type != id->bits.type)
3475 return (NFS4ERR_BAD_STATEID);
3476
3477 /* From a different server instantiation, return STALE */
3478 if (id->bits.boottime != nsrv4->rfs4_start_time)
3479 return (NFS4ERR_STALE_STATEID);
3480
3481 /*
3482 * From this server but the state is most likely beyond lease
3483 * timeout: return NFS4ERR_EXPIRED. However, there is the
3484 * case of a delegation stateid. For delegations, there is a
3485 * case where the state can be removed without the client's
3486 * knowledge/consent: revocation. In the case of delegation
3487 * revocation, the delegation state will be removed and will
3488 * not be found. If the client does something like a
3489 * DELEGRETURN or even a READ/WRITE with a delegatoin stateid
3490 * that has been revoked, the server should return BAD_STATEID
3491 * instead of the more common EXPIRED error.
3492 */
3493 if (id->bits.boottime == nsrv4->rfs4_start_time) {
3494 if (type == DELEGID)
3495 return (NFS4ERR_BAD_STATEID);
3496 else
3497 return (NFS4ERR_EXPIRED);
3498 }
3499
3500 return (NFS4ERR_BAD_STATEID);
3501 }
3502
3503 /*
3504 * Used later on to find the various state structs. When called from
3505 * rfs4_check_stateid()->rfs4_get_all_state(), no file struct lock is
3506 * taken (it is not needed) and helps on the read/write path with
3507 * respect to performance.
3508 */
3509 static nfsstat4
rfs4_get_state_lockit(stateid4 * stateid,rfs4_state_t ** spp,rfs4_dbsearch_type_t find_invalid,bool_t lock_fp)3510 rfs4_get_state_lockit(stateid4 *stateid, rfs4_state_t **spp,
3511 rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3512 {
3513 stateid_t *id = (stateid_t *)stateid;
3514 rfs4_state_t *sp;
3515
3516 *spp = NULL;
3517
3518 /* If we are booted as a cluster node, was stateid locally generated? */
3519 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3520 return (NFS4ERR_STALE_STATEID);
3521
3522 sp = rfs4_findstate(id, find_invalid, lock_fp);
3523 if (sp == NULL) {
3524 return (what_stateid_error(id, OPENID));
3525 }
3526
3527 if (rfs4_lease_expired(sp->rs_owner->ro_client)) {
3528 if (lock_fp == TRUE)
3529 rfs4_state_rele(sp);
3530 else
3531 rfs4_state_rele_nounlock(sp);
3532 return (NFS4ERR_EXPIRED);
3533 }
3534
3535 *spp = sp;
3536
3537 return (NFS4_OK);
3538 }
3539
3540 nfsstat4
rfs4_get_state(stateid4 * stateid,rfs4_state_t ** spp,rfs4_dbsearch_type_t find_invalid)3541 rfs4_get_state(stateid4 *stateid, rfs4_state_t **spp,
3542 rfs4_dbsearch_type_t find_invalid)
3543 {
3544 return (rfs4_get_state_lockit(stateid, spp, find_invalid, TRUE));
3545 }
3546
3547 int
rfs4_check_stateid_seqid(rfs4_state_t * sp,stateid4 * stateid,const compound_state_t * cs)3548 rfs4_check_stateid_seqid(rfs4_state_t *sp, stateid4 *stateid,
3549 const compound_state_t *cs)
3550 {
3551 stateid_t *id = (stateid_t *)stateid;
3552 bool_t has_session = rfs4_has_session(cs);
3553
3554 if (rfs4_lease_expired(sp->rs_owner->ro_client))
3555 return (NFS4_CHECK_STATEID_EXPIRED);
3556
3557 if (has_session && id->bits.chgseq == 0)
3558 return (NFS4_CHECK_STATEID_OKAY);
3559
3560 /* Stateid is some time in the future - that's bad */
3561 if (sp->rs_stateid.bits.chgseq < id->bits.chgseq)
3562 return (NFS4_CHECK_STATEID_BAD);
3563
3564 if (!has_session &&
3565 sp->rs_stateid.bits.chgseq == id->bits.chgseq + 1) {
3566 return (NFS4_CHECK_STATEID_REPLAY);
3567 }
3568
3569 /* Stateid is some time in the past - that's old */
3570 if (sp->rs_stateid.bits.chgseq > id->bits.chgseq)
3571 return (NFS4_CHECK_STATEID_OLD);
3572
3573 /* Caller needs to know about confirmation before closure */
3574 if (sp->rs_owner->ro_need_confirm)
3575 return (NFS4_CHECK_STATEID_UNCONFIRMED);
3576
3577 if (sp->rs_closed == TRUE)
3578 return (NFS4_CHECK_STATEID_CLOSED);
3579
3580 return (NFS4_CHECK_STATEID_OKAY);
3581 }
3582
3583 int
rfs4_check_lo_stateid_seqid(rfs4_lo_state_t * lsp,stateid4 * stateid,const compound_state_t * cs)3584 rfs4_check_lo_stateid_seqid(rfs4_lo_state_t *lsp, stateid4 *stateid,
3585 const compound_state_t *cs)
3586 {
3587 stateid_t *id = (stateid_t *)stateid;
3588 bool_t has_session = rfs4_has_session(cs);
3589
3590 if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client))
3591 return (NFS4_CHECK_STATEID_EXPIRED);
3592
3593 if (has_session && id->bits.chgseq == 0)
3594 return (NFS4_CHECK_STATEID_OKAY);
3595
3596 /* Stateid is some time in the future - that's bad */
3597 if (lsp->rls_lockid.bits.chgseq < id->bits.chgseq)
3598 return (NFS4_CHECK_STATEID_BAD);
3599
3600 if (!has_session &&
3601 lsp->rls_lockid.bits.chgseq == id->bits.chgseq + 1) {
3602 return (NFS4_CHECK_STATEID_REPLAY);
3603 }
3604
3605 /* Stateid is some time in the past - that's old */
3606 if (lsp->rls_lockid.bits.chgseq > id->bits.chgseq)
3607 return (NFS4_CHECK_STATEID_OLD);
3608
3609 if (lsp->rls_state->rs_closed == TRUE)
3610 return (NFS4_CHECK_STATEID_CLOSED);
3611
3612 return (NFS4_CHECK_STATEID_OKAY);
3613 }
3614
3615 nfsstat4
rfs4_get_deleg_state(stateid4 * stateid,rfs4_deleg_state_t ** dspp)3616 rfs4_get_deleg_state(stateid4 *stateid, rfs4_deleg_state_t **dspp)
3617 {
3618 stateid_t *id = (stateid_t *)stateid;
3619 rfs4_deleg_state_t *dsp;
3620
3621 *dspp = NULL;
3622
3623 /* If we are booted as a cluster node, was stateid locally generated? */
3624 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3625 return (NFS4ERR_STALE_STATEID);
3626
3627 dsp = rfs4_finddelegstate(id);
3628 if (dsp == NULL) {
3629 return (what_stateid_error(id, DELEGID));
3630 }
3631
3632 if (rfs4_lease_expired(dsp->rds_client)) {
3633 rfs4_deleg_state_rele(dsp);
3634 return (NFS4ERR_EXPIRED);
3635 }
3636
3637 *dspp = dsp;
3638
3639 return (NFS4_OK);
3640 }
3641
3642 nfsstat4
rfs4_get_lo_state(stateid4 * stateid,rfs4_lo_state_t ** lspp,bool_t lock_fp)3643 rfs4_get_lo_state(stateid4 *stateid, rfs4_lo_state_t **lspp, bool_t lock_fp)
3644 {
3645 stateid_t *id = (stateid_t *)stateid;
3646 rfs4_lo_state_t *lsp;
3647
3648 *lspp = NULL;
3649
3650 /* If we are booted as a cluster node, was stateid locally generated? */
3651 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3652 return (NFS4ERR_STALE_STATEID);
3653
3654 lsp = rfs4_findlo_state(id, lock_fp);
3655 if (lsp == NULL) {
3656 return (what_stateid_error(id, LOCKID));
3657 }
3658
3659 if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client)) {
3660 rfs4_lo_state_rele(lsp, lock_fp);
3661 return (NFS4ERR_EXPIRED);
3662 }
3663
3664 *lspp = lsp;
3665
3666 return (NFS4_OK);
3667 }
3668
3669 static nfsstat4
rfs4_get_all_state(stateid4 * sid,rfs4_state_t ** spp,rfs4_deleg_state_t ** dspp,rfs4_lo_state_t ** lspp)3670 rfs4_get_all_state(stateid4 *sid, rfs4_state_t **spp,
3671 rfs4_deleg_state_t **dspp, rfs4_lo_state_t **lspp)
3672 {
3673 rfs4_state_t *sp = NULL;
3674 rfs4_deleg_state_t *dsp = NULL;
3675 rfs4_lo_state_t *lsp = NULL;
3676 stateid_t *id;
3677 nfsstat4 status;
3678
3679 *spp = NULL; *dspp = NULL; *lspp = NULL;
3680
3681 id = (stateid_t *)sid;
3682 switch (id->bits.type) {
3683 case OPENID:
3684 status = rfs4_get_state_lockit(sid, &sp, FALSE, FALSE);
3685 break;
3686 case DELEGID:
3687 status = rfs4_get_deleg_state(sid, &dsp);
3688 break;
3689 case LOCKID:
3690 status = rfs4_get_lo_state(sid, &lsp, FALSE);
3691 if (status == NFS4_OK) {
3692 sp = lsp->rls_state;
3693 rfs4_dbe_hold(sp->rs_dbe);
3694 }
3695 break;
3696 default:
3697 status = NFS4ERR_BAD_STATEID;
3698 }
3699
3700 if (status == NFS4_OK) {
3701 *spp = sp;
3702 *dspp = dsp;
3703 *lspp = lsp;
3704 }
3705
3706 return (status);
3707 }
3708
3709 /*
3710 * Given the I/O mode (FREAD or FWRITE), this checks whether the
3711 * rfs4_state_t struct has access to do this operation and if so
3712 * return NFS4_OK; otherwise the proper NFSv4 error is returned.
3713 */
3714 nfsstat4
rfs4_state_has_access(rfs4_state_t * sp,int mode,vnode_t * vp)3715 rfs4_state_has_access(rfs4_state_t *sp, int mode, vnode_t *vp)
3716 {
3717 nfsstat4 stat = NFS4_OK;
3718 rfs4_file_t *fp;
3719 bool_t create = FALSE;
3720
3721 rfs4_dbe_lock(sp->rs_dbe);
3722 if (mode == FWRITE) {
3723 if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)) {
3724 stat = NFS4ERR_OPENMODE;
3725 }
3726 } else if (mode == FREAD) {
3727 if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)) {
3728 /*
3729 * If we have OPENed the file with DENYing access
3730 * to both READ and WRITE then no one else could
3731 * have OPENed the file, hence no conflicting READ
3732 * deny. This check is merely an optimization.
3733 */
3734 if (sp->rs_share_deny == OPEN4_SHARE_DENY_BOTH)
3735 goto out;
3736
3737 /* Check against file struct's DENY mode */
3738 fp = rfs4_findfile(vp, NULL, &create);
3739 if (fp != NULL) {
3740 int deny_read = 0;
3741 rfs4_dbe_lock(fp->rf_dbe);
3742 /*
3743 * Check if any other open owner has the file
3744 * OPENed with deny READ.
3745 */
3746 if (sp->rs_share_deny & OPEN4_SHARE_DENY_READ)
3747 deny_read = 1;
3748 ASSERT(fp->rf_deny_read >= deny_read);
3749 if (fp->rf_deny_read > deny_read)
3750 stat = NFS4ERR_OPENMODE;
3751 rfs4_dbe_unlock(fp->rf_dbe);
3752 rfs4_file_rele(fp);
3753 }
3754 }
3755 } else {
3756 /* Illegal I/O mode */
3757 stat = NFS4ERR_INVAL;
3758 }
3759 out:
3760 rfs4_dbe_unlock(sp->rs_dbe);
3761 return (stat);
3762 }
3763
3764 static nfsstat4
check_state_seqid(stateid_t * st,stateid_t * in,bool_t has_session)3765 check_state_seqid(stateid_t *st, stateid_t *in, bool_t has_session)
3766 {
3767 /* rfc56661, section 8.2.2, "seqid to zero" */
3768 if (has_session && in->bits.chgseq == 0)
3769 return (NFS4_OK);
3770
3771 /* Seqid in the future? - that's bad */
3772 if (st->bits.chgseq < in->bits.chgseq)
3773 return (NFS4ERR_BAD_STATEID);
3774
3775 /* Seqid in the past? - that's old */
3776 if (st->bits.chgseq > in->bits.chgseq)
3777 return (NFS4ERR_OLD_STATEID);
3778
3779 return (NFS4_OK);
3780 }
3781
3782 /*
3783 * Given the I/O mode (FREAD or FWRITE), the vnode, the stateid and whether
3784 * the file is being truncated, return NFS4_OK if allowed or appropriate
3785 * V4 error if not. Note NFS4ERR_DELAY will be returned and a recall on
3786 * the associated file will be done if the I/O is not consistent with any
3787 * delegation in effect on the file. Should be holding VOP_RWLOCK, either
3788 * as reader or writer as appropriate. rfs4_op_open will acquire the
3789 * VOP_RWLOCK as writer when setting up delegation. If the stateid is bad
3790 * this routine will return NFS4ERR_BAD_STATEID. In addition, through the
3791 * deleg parameter, we will return whether a write delegation is held by
3792 * the client associated with this stateid.
3793 * If the server instance associated with the relevant client is in its
3794 * grace period, return NFS4ERR_GRACE.
3795 */
3796
3797 nfsstat4
rfs4_check_stateid(int mode,vnode_t * vp,stateid4 * stateid,bool_t trunc,bool_t * deleg,bool_t do_access,caller_context_t * ct,compound_state_t * cs)3798 rfs4_check_stateid(int mode, vnode_t *vp,
3799 stateid4 *stateid, bool_t trunc, bool_t *deleg,
3800 bool_t do_access, caller_context_t *ct, compound_state_t *cs)
3801 {
3802 rfs4_file_t *fp;
3803 bool_t create = FALSE;
3804 rfs4_state_t *sp;
3805 rfs4_deleg_state_t *dsp;
3806 rfs4_lo_state_t *lsp;
3807 stateid_t *id = (stateid_t *)stateid;
3808 nfsstat4 stat = NFS4_OK;
3809 bool_t use_ss = rfs4_has_session(cs);
3810
3811 if (ct != NULL) {
3812 ct->cc_sysid = 0;
3813 ct->cc_pid = 0;
3814 ct->cc_caller_id = nfs4_srv_caller_id;
3815 ct->cc_flags = CC_DONTBLOCK;
3816 }
3817
3818 if (ISSPECIAL(stateid)) {
3819 fp = rfs4_findfile(vp, NULL, &create);
3820 if (fp == NULL)
3821 return (NFS4_OK);
3822
3823 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
3824 rfs4_file_rele(fp);
3825 return (NFS4_OK);
3826 }
3827 if (mode == FWRITE ||
3828 fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
3829 rfs4_recall_deleg(fp, trunc, NULL);
3830 rfs4_file_rele(fp);
3831 return (NFS4ERR_DELAY);
3832 }
3833 rfs4_file_rele(fp);
3834 return (NFS4_OK);
3835 } else {
3836 stat = rfs4_get_all_state(stateid, &sp, &dsp, &lsp);
3837 if (stat != NFS4_OK)
3838 return (stat);
3839
3840 if (lsp != NULL) {
3841 /* Is associated server instance in its grace period? */
3842 if (rfs4_clnt_in_grace(lsp->rls_locker->rl_client)) {
3843 rfs4_lo_state_rele(lsp, FALSE);
3844 if (sp != NULL)
3845 rfs4_state_rele_nounlock(sp);
3846 return (NFS4ERR_GRACE);
3847 }
3848
3849 ASSERT(id->bits.type == LOCKID);
3850 stat = check_state_seqid(&lsp->rls_lockid, id, use_ss);
3851 if (stat) {
3852 rfs4_lo_state_rele(lsp, FALSE);
3853 if (sp)
3854 rfs4_state_rele_nounlock(sp);
3855 return (stat);
3856 }
3857
3858 /* Ensure specified filehandle matches */
3859 if (lsp->rls_state->rs_finfo->rf_vp != vp) {
3860 rfs4_lo_state_rele(lsp, FALSE);
3861 if (sp != NULL)
3862 rfs4_state_rele_nounlock(sp);
3863 return (NFS4ERR_BAD_STATEID);
3864 }
3865
3866 if (ct != NULL) {
3867 ct->cc_sysid =
3868 lsp->rls_locker->rl_client->rc_sysidt;
3869 ct->cc_pid = lsp->rls_locker->rl_pid;
3870 }
3871 rfs4_lo_state_rele(lsp, FALSE);
3872 }
3873
3874 /* Stateid provided was an "open" stateid */
3875 if (sp != NULL) {
3876 /* Is associated server instance in its grace period? */
3877 if (rfs4_clnt_in_grace(sp->rs_owner->ro_client)) {
3878 rfs4_state_rele_nounlock(sp);
3879 return (NFS4ERR_GRACE);
3880 }
3881 /* Skip if is here via the LOCKID */
3882 if (id->bits.type == OPENID) {
3883 stat = check_state_seqid(&sp->rs_stateid, id,
3884 use_ss);
3885 if (stat) {
3886 rfs4_state_rele_nounlock(sp);
3887 return (stat);
3888 }
3889 }
3890 /* Ensure specified filehandle matches */
3891 if (sp->rs_finfo->rf_vp != vp) {
3892 rfs4_state_rele_nounlock(sp);
3893 return (NFS4ERR_BAD_STATEID);
3894 }
3895
3896 if (sp->rs_owner->ro_need_confirm) {
3897 rfs4_state_rele_nounlock(sp);
3898 return (NFS4ERR_BAD_STATEID);
3899 }
3900
3901 if (sp->rs_closed == TRUE) {
3902 rfs4_state_rele_nounlock(sp);
3903 return (NFS4ERR_OLD_STATEID);
3904 }
3905
3906 if (do_access)
3907 stat = rfs4_state_has_access(sp, mode, vp);
3908 else
3909 stat = NFS4_OK;
3910
3911 /*
3912 * Return whether this state has write
3913 * delegation if desired
3914 */
3915 if (deleg && (sp->rs_finfo->rf_dinfo.rd_dtype ==
3916 OPEN_DELEGATE_WRITE))
3917 *deleg = TRUE;
3918
3919 /*
3920 * We got a valid stateid, so we update the
3921 * lease on the client. Ideally we would like
3922 * to do this after the calling op succeeds,
3923 * but for now this will be good
3924 * enough. Callers of this routine are
3925 * currently insulated from the state stuff.
3926 */
3927 rfs4_update_lease(sp->rs_owner->ro_client);
3928
3929 /*
3930 * If a delegation is present on this file and
3931 * this is a WRITE, then update the lastwrite
3932 * time to indicate that activity is present.
3933 */
3934 if (sp->rs_finfo->rf_dinfo.rd_dtype ==
3935 OPEN_DELEGATE_WRITE &&
3936 mode == FWRITE) {
3937 sp->rs_finfo->rf_dinfo.rd_time_lastwrite =
3938 gethrestime_sec();
3939 }
3940
3941 /* Fill context for possible nbmand check */
3942 if (ct != NULL && ct->cc_pid == 0) {
3943 ct->cc_sysid =
3944 sp->rs_owner->ro_client->rc_sysidt;
3945 ct->cc_pid =
3946 rfs4_dbe_getid(sp->rs_owner->ro_dbe);
3947 }
3948
3949 rfs4_state_rele_nounlock(sp);
3950
3951 return (stat);
3952 }
3953
3954 if (dsp != NULL) {
3955 /* Is associated server instance in its grace period? */
3956 if (rfs4_clnt_in_grace(dsp->rds_client)) {
3957 rfs4_deleg_state_rele(dsp);
3958 return (NFS4ERR_GRACE);
3959 }
3960
3961 stat = check_state_seqid(&dsp->rds_delegid, id, use_ss);
3962 if (stat) {
3963 rfs4_deleg_state_rele(dsp);
3964 return (stat);
3965 }
3966
3967 /* Ensure specified filehandle matches */
3968 if (dsp->rds_finfo->rf_vp != vp) {
3969 rfs4_deleg_state_rele(dsp);
3970 return (NFS4ERR_BAD_STATEID);
3971 }
3972 /*
3973 * Return whether this state has write
3974 * delegation if desired
3975 */
3976 if (deleg && (dsp->rds_finfo->rf_dinfo.rd_dtype ==
3977 OPEN_DELEGATE_WRITE))
3978 *deleg = TRUE;
3979
3980 rfs4_update_lease(dsp->rds_client);
3981
3982 /*
3983 * If a delegation is present on this file and
3984 * this is a WRITE, then update the lastwrite
3985 * time to indicate that activity is present.
3986 */
3987 if (dsp->rds_finfo->rf_dinfo.rd_dtype ==
3988 OPEN_DELEGATE_WRITE && mode == FWRITE) {
3989 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite =
3990 gethrestime_sec();
3991 }
3992
3993 /*
3994 * XXX - what happens if this is a WRITE and the
3995 * delegation type of for READ.
3996 */
3997 rfs4_deleg_state_rele(dsp);
3998
3999 return (stat);
4000 }
4001 /*
4002 * If we got this far, something bad happened
4003 */
4004 return (NFS4ERR_BAD_STATEID);
4005 }
4006 }
4007
4008
4009 /*
4010 * This is a special function in that for the file struct provided the
4011 * server wants to remove/close all current state associated with the
4012 * file. The prime use of this would be with OP_REMOVE to force the
4013 * release of state and particularly of file locks.
4014 *
4015 * There is an assumption that there is no delegations outstanding on
4016 * this file at this point. The caller should have waited for those
4017 * to be returned or revoked.
4018 */
4019 void
rfs4_close_all_state(rfs4_file_t * fp)4020 rfs4_close_all_state(rfs4_file_t *fp)
4021 {
4022 rfs4_state_t *sp;
4023
4024 rfs4_dbe_lock(fp->rf_dbe);
4025
4026 #ifdef DEBUG
4027 /* only applies when server is handing out delegations */
4028 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE)
4029 ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
4030 #endif
4031
4032 /* No delegations for this file */
4033 ASSERT(list_is_empty(&fp->rf_delegstatelist));
4034
4035 /* Make sure that it can not be found */
4036 rfs4_dbe_invalidate(fp->rf_dbe);
4037
4038 if (fp->rf_vp == NULL) {
4039 rfs4_dbe_unlock(fp->rf_dbe);
4040 return;
4041 }
4042 rfs4_dbe_unlock(fp->rf_dbe);
4043
4044 /*
4045 * Hold as writer to prevent other server threads from
4046 * processing requests related to the file while all state is
4047 * being removed.
4048 */
4049 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
4050
4051 /* Remove ALL state from the file */
4052 while ((sp = rfs4_findstate_by_file(fp)) != NULL) {
4053 rfs4_state_close(sp, FALSE, FALSE, CRED());
4054 rfs4_state_rele_nounlock(sp);
4055 }
4056
4057 /*
4058 * This is only safe since there are no further references to
4059 * the file.
4060 */
4061 rfs4_dbe_lock(fp->rf_dbe);
4062 if (fp->rf_vp) {
4063 vnode_t *vp = fp->rf_vp;
4064
4065 mutex_enter(&vp->v_vsd_lock);
4066 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
4067 mutex_exit(&vp->v_vsd_lock);
4068 VN_RELE(vp);
4069 fp->rf_vp = NULL;
4070 }
4071 rfs4_dbe_unlock(fp->rf_dbe);
4072
4073 /* Finally let other references to proceed */
4074 rw_exit(&fp->rf_file_rwlock);
4075 }
4076
4077 /*
4078 * This function is used as a target for the rfs4_dbe_walk() call
4079 * below. The purpose of this function is to see if the
4080 * lockowner_state refers to a file that resides within the exportinfo
4081 * export. If so, then remove the lock_owner state (file locks and
4082 * share "locks") for this object since the intent is the server is
4083 * unexporting the specified directory. Be sure to invalidate the
4084 * object after the state has been released
4085 */
4086 static void
rfs4_lo_state_walk_callout(rfs4_entry_t u_entry,void * e)4087 rfs4_lo_state_walk_callout(rfs4_entry_t u_entry, void *e)
4088 {
4089 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
4090 struct exportinfo *exi = (struct exportinfo *)e;
4091 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
4092 fhandle_t *efhp;
4093
4094 efhp = (fhandle_t *)&exi->exi_fh;
4095 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4096
4097 FH_TO_FMT4(efhp, exi_fhp);
4098
4099 finfo_fhp = (nfs_fh4_fmt_t *)lsp->rls_state->rs_finfo->
4100 rf_filehandle.nfs_fh4_val;
4101
4102 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4103 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4104 exi_fhp->fh4_xlen) == 0) {
4105 rfs4_state_close(lsp->rls_state, FALSE, FALSE, CRED());
4106 rfs4_dbe_invalidate(lsp->rls_dbe);
4107 rfs4_dbe_invalidate(lsp->rls_state->rs_dbe);
4108 }
4109 }
4110
4111 /*
4112 * This function is used as a target for the rfs4_dbe_walk() call
4113 * below. The purpose of this function is to see if the state refers
4114 * to a file that resides within the exportinfo export. If so, then
4115 * remove the open state for this object since the intent is the
4116 * server is unexporting the specified directory. The main result for
4117 * this type of entry is to invalidate it such it will not be found in
4118 * the future.
4119 */
4120 static void
rfs4_state_walk_callout(rfs4_entry_t u_entry,void * e)4121 rfs4_state_walk_callout(rfs4_entry_t u_entry, void *e)
4122 {
4123 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
4124 struct exportinfo *exi = (struct exportinfo *)e;
4125 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
4126 fhandle_t *efhp;
4127
4128 efhp = (fhandle_t *)&exi->exi_fh;
4129 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4130
4131 FH_TO_FMT4(efhp, exi_fhp);
4132
4133 finfo_fhp =
4134 (nfs_fh4_fmt_t *)sp->rs_finfo->rf_filehandle.nfs_fh4_val;
4135
4136 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4137 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4138 exi_fhp->fh4_xlen) == 0) {
4139 rfs4_state_close(sp, TRUE, FALSE, CRED());
4140 rfs4_dbe_invalidate(sp->rs_dbe);
4141 }
4142 }
4143
4144 /*
4145 * This function is used as a target for the rfs4_dbe_walk() call
4146 * below. The purpose of this function is to see if the state refers
4147 * to a file that resides within the exportinfo export. If so, then
4148 * remove the deleg state for this object since the intent is the
4149 * server is unexporting the specified directory. The main result for
4150 * this type of entry is to invalidate it such it will not be found in
4151 * the future.
4152 */
4153 static void
rfs4_deleg_state_walk_callout(rfs4_entry_t u_entry,void * e)4154 rfs4_deleg_state_walk_callout(rfs4_entry_t u_entry, void *e)
4155 {
4156 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
4157 struct exportinfo *exi = (struct exportinfo *)e;
4158 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
4159 fhandle_t *efhp;
4160
4161 efhp = (fhandle_t *)&exi->exi_fh;
4162 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4163
4164 FH_TO_FMT4(efhp, exi_fhp);
4165
4166 finfo_fhp =
4167 (nfs_fh4_fmt_t *)dsp->rds_finfo->rf_filehandle.nfs_fh4_val;
4168
4169 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4170 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4171 exi_fhp->fh4_xlen) == 0) {
4172 rfs4_dbe_invalidate(dsp->rds_dbe);
4173 }
4174 }
4175
4176 /*
4177 * This function is used as a target for the rfs4_dbe_walk() call
4178 * below. The purpose of this function is to see if the state refers
4179 * to a file that resides within the exportinfo export. If so, then
4180 * release vnode hold for this object since the intent is the server
4181 * is unexporting the specified directory. Invalidation will prevent
4182 * this struct from being found in the future.
4183 */
4184 static void
rfs4_file_walk_callout(rfs4_entry_t u_entry,void * e)4185 rfs4_file_walk_callout(rfs4_entry_t u_entry, void *e)
4186 {
4187 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
4188 struct exportinfo *exi = (struct exportinfo *)e;
4189 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
4190 fhandle_t *efhp;
4191
4192 efhp = (fhandle_t *)&exi->exi_fh;
4193 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4194
4195 FH_TO_FMT4(efhp, exi_fhp);
4196
4197 finfo_fhp = (nfs_fh4_fmt_t *)fp->rf_filehandle.nfs_fh4_val;
4198
4199 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4200 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4201 exi_fhp->fh4_xlen) == 0) {
4202 if (fp->rf_vp) {
4203 vnode_t *vp = fp->rf_vp;
4204
4205 /*
4206 * don't leak monitors and remove the reference
4207 * put on the vnode when the delegation was granted.
4208 */
4209 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ) {
4210 (void) fem_uninstall(vp, deleg_rdops,
4211 (void *)fp);
4212 vn_open_downgrade(vp, FREAD);
4213 } else if (fp->rf_dinfo.rd_dtype ==
4214 OPEN_DELEGATE_WRITE) {
4215 (void) fem_uninstall(vp, deleg_wrops,
4216 (void *)fp);
4217 vn_open_downgrade(vp, FREAD|FWRITE);
4218 }
4219 mutex_enter(&vp->v_vsd_lock);
4220 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
4221 mutex_exit(&vp->v_vsd_lock);
4222 VN_RELE(vp);
4223 fp->rf_vp = NULL;
4224 }
4225 rfs4_dbe_invalidate(fp->rf_dbe);
4226 }
4227 }
4228
4229 /*
4230 * Given a directory that is being unexported, cleanup/release all
4231 * state in the server that refers to objects residing underneath this
4232 * particular export. The ordering of the release is important.
4233 * Lock_owner, then state and then file.
4234 *
4235 * NFS zones note: nfs_export.c:unexport() calls this from a
4236 * thread in the global zone for NGZ data structures, so we
4237 * CANNOT use zone_getspecific anywhere in this code path.
4238 */
4239 void
rfs4_clean_state_exi(nfs_export_t * ne,struct exportinfo * exi)4240 rfs4_clean_state_exi(nfs_export_t *ne, struct exportinfo *exi)
4241 {
4242 nfs_globals_t *ng;
4243 nfs4_srv_t *nsrv4;
4244
4245 ng = ne->ne_globals;
4246 ASSERT(ng->nfs_zoneid == exi->exi_zoneid);
4247 nsrv4 = ng->nfs4_srv;
4248
4249 mutex_enter(&nsrv4->state_lock);
4250
4251 if (nsrv4->nfs4_server_state == NULL) {
4252 mutex_exit(&nsrv4->state_lock);
4253 return;
4254 }
4255
4256 rfs4_dbe_walk(nsrv4->rfs4_lo_state_tab,
4257 rfs4_lo_state_walk_callout, exi);
4258 rfs4_dbe_walk(nsrv4->rfs4_state_tab, rfs4_state_walk_callout, exi);
4259 rfs4_dbe_walk(nsrv4->rfs4_deleg_state_tab,
4260 rfs4_deleg_state_walk_callout, exi);
4261 rfs4_dbe_walk(nsrv4->rfs4_file_tab, rfs4_file_walk_callout, exi);
4262
4263 mutex_exit(&nsrv4->state_lock);
4264 }
4265