1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26 /*
27 * Copyright 2018 Nexenta Systems, Inc.
28 * Copyright 2019 Nexenta by DDN, Inc.
29 * Copyright 2020 RackTop Systems, Inc.
30 * Copyright 2023 MNX Cloud, Inc.
31 */
32
33 #include <sys/systm.h>
34 #include <sys/kmem.h>
35 #include <sys/cmn_err.h>
36 #include <sys/atomic.h>
37 #include <sys/clconf.h>
38 #include <sys/cladm.h>
39 #include <sys/flock.h>
40 #include <nfs/export.h>
41 #include <nfs/nfs.h>
42 #include <nfs/nfs4.h>
43 #include <nfs/nfssys.h>
44 #include <nfs/lm.h>
45 #include <sys/pathname.h>
46 #include <sys/sdt.h>
47 #include <sys/nvpair.h>
48
49 extern u_longlong_t nfs4_srv_caller_id;
50
51 extern uint_t nfs4_srv_vkey;
52
53 stateid4 zero_stateid; /* all zeros */
54 stateid4 one_stateid = {
55 .seqid = ~0,
56 .other = { ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0, ~0 }
57 };
58 stateid4 current_stateid = {
59 .seqid = 1
60 };
61 stateid4 invalid_stateid = {
62 .seqid = ~0
63 };
64
65 #define ZERO_STATEID(x) (!memcmp((x), &zero_stateid, sizeof (stateid4)))
66 #define ONE_STATEID(x) (!memcmp((x), &one_stateid, sizeof (stateid4)))
67 #define CURRENT_STATEID(x) (!memcmp((x), ¤t_stateid, sizeof (stateid4)))
68
69 /* For embedding the cluster nodeid into our clientid */
70 #define CLUSTER_NODEID_SHIFT 24
71 #define CLUSTER_MAX_NODEID 255
72
73 #ifdef DEBUG
74 int rfs4_debug;
75 #endif
76
77 rfs4_db_mem_cache_t rfs4_db_mem_cache_table[RFS4_DB_MEM_CACHE_NUM];
78 static uint32_t rfs4_database_debug = 0x00;
79
80 /* CSTYLED */
81 static void rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf);
82 static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf);
83 static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip);
84 static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip);
85
86 void
put_stateid4(struct compound_state * cs,stateid4 * state)87 put_stateid4(struct compound_state *cs, stateid4 *state)
88 {
89 if (*cs->statusp == NFS4_OK && cs->minorversion) {
90 memcpy(&cs->current_stateid, state, sizeof (stateid4));
91 cs->cs_flags |= RFS4_CURRENT_STATEID;
92 }
93 }
94
95 void
get_stateid4(struct compound_state * cs,stateid4 * state)96 get_stateid4(struct compound_state *cs, stateid4 *state)
97 {
98 if ((cs->cs_flags & RFS4_CURRENT_STATEID) && CURRENT_STATEID(state)) {
99 memcpy(state, &cs->current_stateid, sizeof (stateid4));
100 }
101 }
102
103 /*
104 * Couple of simple init/destroy functions for a general waiter
105 */
106 void
rfs4_sw_init(rfs4_state_wait_t * swp)107 rfs4_sw_init(rfs4_state_wait_t *swp)
108 {
109 mutex_init(swp->sw_cv_lock, NULL, MUTEX_DEFAULT, NULL);
110 cv_init(swp->sw_cv, NULL, CV_DEFAULT, NULL);
111 swp->sw_active = FALSE;
112 swp->sw_wait_count = 0;
113 }
114
115 void
rfs4_sw_destroy(rfs4_state_wait_t * swp)116 rfs4_sw_destroy(rfs4_state_wait_t *swp)
117 {
118 mutex_destroy(swp->sw_cv_lock);
119 cv_destroy(swp->sw_cv);
120 }
121
122 void
rfs4_sw_enter(rfs4_state_wait_t * swp)123 rfs4_sw_enter(rfs4_state_wait_t *swp)
124 {
125 mutex_enter(swp->sw_cv_lock);
126 while (swp->sw_active) {
127 swp->sw_wait_count++;
128 cv_wait(swp->sw_cv, swp->sw_cv_lock);
129 swp->sw_wait_count--;
130 }
131 ASSERT(swp->sw_active == FALSE);
132 swp->sw_active = TRUE;
133 mutex_exit(swp->sw_cv_lock);
134 }
135
136 void
rfs4_sw_exit(rfs4_state_wait_t * swp)137 rfs4_sw_exit(rfs4_state_wait_t *swp)
138 {
139 mutex_enter(swp->sw_cv_lock);
140 ASSERT(swp->sw_active == TRUE);
141 swp->sw_active = FALSE;
142 if (swp->sw_wait_count != 0)
143 cv_broadcast(swp->sw_cv);
144 mutex_exit(swp->sw_cv_lock);
145 }
146
147 static void
deep_lock_copy(LOCK4res * dres,LOCK4res * sres)148 deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
149 {
150 lock_owner4 *slo = &sres->LOCK4res_u.denied.owner;
151 lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner;
152
153 if (sres->status == NFS4ERR_DENIED) {
154 dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP);
155 bcopy(slo->owner_val, dlo->owner_val, slo->owner_len);
156 }
157 }
158
159 /*
160 * CPR callback id -- not related to v4 callbacks
161 */
162 static callb_id_t cpr_id = 0;
163
164 static void
deep_lock_free(LOCK4res * res)165 deep_lock_free(LOCK4res *res)
166 {
167 lock_owner4 *lo = &res->LOCK4res_u.denied.owner;
168
169 if (res->status == NFS4ERR_DENIED)
170 kmem_free(lo->owner_val, lo->owner_len);
171 }
172
173 static void
deep_open_copy(OPEN4res * dres,OPEN4res * sres)174 deep_open_copy(OPEN4res *dres, OPEN4res *sres)
175 {
176 nfsace4 *sacep, *dacep;
177
178 if (sres->status != NFS4_OK) {
179 return;
180 }
181
182 dres->attrset = sres->attrset;
183
184 switch (sres->delegation.delegation_type) {
185 case OPEN_DELEGATE_NONE:
186 return;
187 case OPEN_DELEGATE_READ:
188 sacep = &sres->delegation.open_delegation4_u.read.permissions;
189 dacep = &dres->delegation.open_delegation4_u.read.permissions;
190 break;
191 case OPEN_DELEGATE_WRITE:
192 sacep = &sres->delegation.open_delegation4_u.write.permissions;
193 dacep = &dres->delegation.open_delegation4_u.write.permissions;
194 break;
195 }
196 dacep->who.utf8string_val =
197 kmem_alloc(sacep->who.utf8string_len, KM_SLEEP);
198 bcopy(sacep->who.utf8string_val, dacep->who.utf8string_val,
199 sacep->who.utf8string_len);
200 }
201
202 static void
deep_open_free(OPEN4res * res)203 deep_open_free(OPEN4res *res)
204 {
205 nfsace4 *acep;
206 if (res->status != NFS4_OK)
207 return;
208
209 switch (res->delegation.delegation_type) {
210 case OPEN_DELEGATE_NONE:
211 return;
212 case OPEN_DELEGATE_READ:
213 acep = &res->delegation.open_delegation4_u.read.permissions;
214 break;
215 case OPEN_DELEGATE_WRITE:
216 acep = &res->delegation.open_delegation4_u.write.permissions;
217 break;
218 }
219
220 if (acep->who.utf8string_val) {
221 kmem_free(acep->who.utf8string_val, acep->who.utf8string_len);
222 acep->who.utf8string_val = NULL;
223 }
224 }
225
226 void
rfs4_free_reply(nfs_resop4 * rp)227 rfs4_free_reply(nfs_resop4 *rp)
228 {
229 switch (rp->resop) {
230 case OP_LOCK:
231 deep_lock_free(&rp->nfs_resop4_u.oplock);
232 break;
233 case OP_OPEN:
234 deep_open_free(&rp->nfs_resop4_u.opopen);
235 default:
236 break;
237 }
238 }
239
240 void
rfs4_copy_reply(nfs_resop4 * dst,nfs_resop4 * src)241 rfs4_copy_reply(nfs_resop4 *dst, nfs_resop4 *src)
242 {
243 *dst = *src;
244
245 /* Handle responses that need deep copy */
246 switch (src->resop) {
247 case OP_LOCK:
248 deep_lock_copy(&dst->nfs_resop4_u.oplock,
249 &src->nfs_resop4_u.oplock);
250 break;
251 case OP_OPEN:
252 deep_open_copy(&dst->nfs_resop4_u.opopen,
253 &src->nfs_resop4_u.opopen);
254 break;
255 default:
256 break;
257 };
258 }
259
260 /*
261 * This is the implementation of the underlying state engine. The
262 * public interface to this engine is described by
263 * nfs4_state.h. Callers to the engine should hold no state engine
264 * locks when they call in to it. If the protocol needs to lock data
265 * structures it should do so after acquiring all references to them
266 * first and then follow the following lock order:
267 *
268 * client > openowner > state > lo_state > lockowner > file.
269 *
270 * Internally we only allow a thread to hold one hash bucket lock at a
271 * time and the lock is higher in the lock order (must be acquired
272 * first) than the data structure that is on that hash list.
273 *
274 * If a new reference was acquired by the caller, that reference needs
275 * to be released after releasing all acquired locks with the
276 * corresponding rfs4_*_rele routine.
277 */
278
279 /*
280 * This code is some what prototypical for now. Its purpose currently is to
281 * implement the interfaces sufficiently to finish the higher protocol
282 * elements. This will be replaced by a dynamically resizeable tables
283 * backed by kmem_cache allocator. However synchronization is handled
284 * correctly (I hope) and will not change by much. The mutexes for
285 * the hash buckets that can be used to create new instances of data
286 * structures might be good candidates to evolve into reader writer
287 * locks. If it has to do a creation, it would be holding the
288 * mutex across a kmem_alloc with KM_SLEEP specified.
289 */
290
291 #ifdef DEBUG
292 #define TABSIZE 17
293 #else
294 #define TABSIZE 2047
295 #endif
296
297 #define ADDRHASH(key) ((unsigned long)(key) >> 3)
298
299 #define MAXTABSZ 1024*1024
300
301 /* The values below are rfs4_lease_time units */
302
303 #ifdef DEBUG
304 #define CLIENT_CACHE_TIME 1
305 #define OPENOWNER_CACHE_TIME 1
306 #define STATE_CACHE_TIME 1
307 #define LO_STATE_CACHE_TIME 1
308 #define LOCKOWNER_CACHE_TIME 1
309 #define FILE_CACHE_TIME 3
310 #define DELEG_STATE_CACHE_TIME 1
311 #else
312 #define CLIENT_CACHE_TIME 10
313 #define OPENOWNER_CACHE_TIME 5
314 #define STATE_CACHE_TIME 1
315 #define LO_STATE_CACHE_TIME 1
316 #define LOCKOWNER_CACHE_TIME 3
317 #define FILE_CACHE_TIME 40
318 #define DELEG_STATE_CACHE_TIME 1
319 #endif
320
321 /*
322 * NFSv4 server state databases
323 *
324 * Initilized when the module is loaded and used by NFSv4 state tables.
325 * These kmem_cache databases are global, the tables that make use of these
326 * are per zone.
327 */
328 kmem_cache_t *rfs4_client_mem_cache;
329 kmem_cache_t *rfs4_clntIP_mem_cache;
330 kmem_cache_t *rfs4_openown_mem_cache;
331 kmem_cache_t *rfs4_openstID_mem_cache;
332 kmem_cache_t *rfs4_lockstID_mem_cache;
333 kmem_cache_t *rfs4_lockown_mem_cache;
334 kmem_cache_t *rfs4_file_mem_cache;
335 kmem_cache_t *rfs4_delegstID_mem_cache;
336 kmem_cache_t *rfs4_session_mem_cache;
337
338 /*
339 * NFSv4 state table functions
340 */
341 static bool_t rfs4_client_create(rfs4_entry_t, void *);
342 static void rfs4_dss_remove_cpleaf(rfs4_client_t *);
343 static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *);
344 static void rfs4_client_destroy(rfs4_entry_t);
345 static bool_t rfs4_client_expiry(rfs4_entry_t);
346 static uint32_t clientid_hash(void *);
347 static bool_t clientid_compare(rfs4_entry_t, void *);
348 static void *clientid_mkkey(rfs4_entry_t);
349 static uint32_t nfsclnt_hash(void *);
350 static bool_t nfsclnt_compare(rfs4_entry_t, void *);
351 static void *nfsclnt_mkkey(rfs4_entry_t);
352 static bool_t rfs4_clntip_expiry(rfs4_entry_t);
353 static void rfs4_clntip_destroy(rfs4_entry_t);
354 static bool_t rfs4_clntip_create(rfs4_entry_t, void *);
355 static uint32_t clntip_hash(void *);
356 static bool_t clntip_compare(rfs4_entry_t, void *);
357 static void *clntip_mkkey(rfs4_entry_t);
358 static bool_t rfs4_openowner_create(rfs4_entry_t, void *);
359 static void rfs4_openowner_destroy(rfs4_entry_t);
360 static bool_t rfs4_openowner_expiry(rfs4_entry_t);
361 static uint32_t openowner_hash(void *);
362 static bool_t openowner_compare(rfs4_entry_t, void *);
363 static void *openowner_mkkey(rfs4_entry_t);
364 static bool_t rfs4_state_create(rfs4_entry_t, void *);
365 static void rfs4_state_destroy(rfs4_entry_t);
366 static bool_t rfs4_state_expiry(rfs4_entry_t);
367 static uint32_t state_hash(void *);
368 static bool_t state_compare(rfs4_entry_t, void *);
369 static void *state_mkkey(rfs4_entry_t);
370 static uint32_t state_owner_file_hash(void *);
371 static bool_t state_owner_file_compare(rfs4_entry_t, void *);
372 static void *state_owner_file_mkkey(rfs4_entry_t);
373 static uint32_t state_file_hash(void *);
374 static bool_t state_file_compare(rfs4_entry_t, void *);
375 static void *state_file_mkkey(rfs4_entry_t);
376 static bool_t rfs4_lo_state_create(rfs4_entry_t, void *);
377 static void rfs4_lo_state_destroy(rfs4_entry_t);
378 static bool_t rfs4_lo_state_expiry(rfs4_entry_t);
379 static uint32_t lo_state_hash(void *);
380 static bool_t lo_state_compare(rfs4_entry_t, void *);
381 static void *lo_state_mkkey(rfs4_entry_t);
382 static uint32_t lo_state_lo_hash(void *);
383 static bool_t lo_state_lo_compare(rfs4_entry_t, void *);
384 static void *lo_state_lo_mkkey(rfs4_entry_t);
385 static bool_t rfs4_lockowner_create(rfs4_entry_t, void *);
386 static void rfs4_lockowner_destroy(rfs4_entry_t);
387 static bool_t rfs4_lockowner_expiry(rfs4_entry_t);
388 static uint32_t lockowner_hash(void *);
389 static bool_t lockowner_compare(rfs4_entry_t, void *);
390 static void *lockowner_mkkey(rfs4_entry_t);
391 static uint32_t pid_hash(void *);
392 static bool_t pid_compare(rfs4_entry_t, void *);
393 static void *pid_mkkey(rfs4_entry_t);
394 static bool_t rfs4_file_create(rfs4_entry_t, void *);
395 static void rfs4_file_destroy(rfs4_entry_t);
396 static uint32_t file_hash(void *);
397 static bool_t file_compare(rfs4_entry_t, void *);
398 static void *file_mkkey(rfs4_entry_t);
399 static bool_t rfs4_deleg_state_create(rfs4_entry_t, void *);
400 static void rfs4_deleg_state_destroy(rfs4_entry_t);
401 static bool_t rfs4_deleg_state_expiry(rfs4_entry_t);
402 static uint32_t deleg_hash(void *);
403 static bool_t deleg_compare(rfs4_entry_t, void *);
404 static void *deleg_mkkey(rfs4_entry_t);
405 static uint32_t deleg_state_hash(void *);
406 static bool_t deleg_state_compare(rfs4_entry_t, void *);
407 static void *deleg_state_mkkey(rfs4_entry_t);
408
409 static int rfs4_ss_enabled = 0;
410
411 void
rfs4_ss_pnfree(rfs4_ss_pn_t * ss_pn)412 rfs4_ss_pnfree(rfs4_ss_pn_t *ss_pn)
413 {
414 kmem_free(ss_pn, sizeof (rfs4_ss_pn_t));
415 }
416
417 static rfs4_ss_pn_t *
rfs4_ss_pnalloc(char * dir,char * leaf)418 rfs4_ss_pnalloc(char *dir, char *leaf)
419 {
420 rfs4_ss_pn_t *ss_pn;
421 int dir_len, leaf_len;
422
423 /*
424 * validate we have a resonable path
425 * (account for the '/' and trailing null)
426 */
427 if ((dir_len = strlen(dir)) > MAXPATHLEN ||
428 (leaf_len = strlen(leaf)) > MAXNAMELEN ||
429 (dir_len + leaf_len + 2) > MAXPATHLEN) {
430 return (NULL);
431 }
432
433 ss_pn = kmem_alloc(sizeof (rfs4_ss_pn_t), KM_SLEEP);
434
435 (void) snprintf(ss_pn->pn, MAXPATHLEN, "%s/%s", dir, leaf);
436 /* Handy pointer to just the leaf name */
437 ss_pn->leaf = ss_pn->pn + dir_len + 1;
438 return (ss_pn);
439 }
440
441
442 /*
443 * Move the "leaf" filename from "sdir" directory
444 * to the "ddir" directory. Return the pathname of
445 * the destination unless the rename fails in which
446 * case we need to return the source pathname.
447 */
448 static rfs4_ss_pn_t *
rfs4_ss_movestate(char * sdir,char * ddir,char * leaf)449 rfs4_ss_movestate(char *sdir, char *ddir, char *leaf)
450 {
451 rfs4_ss_pn_t *src, *dst;
452
453 if ((src = rfs4_ss_pnalloc(sdir, leaf)) == NULL)
454 return (NULL);
455
456 if ((dst = rfs4_ss_pnalloc(ddir, leaf)) == NULL) {
457 rfs4_ss_pnfree(src);
458 return (NULL);
459 }
460
461 /*
462 * If the rename fails we shall return the src
463 * pathname and free the dst. Otherwise we need
464 * to free the src and return the dst pathanme.
465 */
466 if (vn_rename(src->pn, dst->pn, UIO_SYSSPACE)) {
467 rfs4_ss_pnfree(dst);
468 return (src);
469 }
470 rfs4_ss_pnfree(src);
471 return (dst);
472 }
473
474
475 static rfs4_oldstate_t *
rfs4_ss_getstate(vnode_t * dvp,rfs4_ss_pn_t * ss_pn)476 rfs4_ss_getstate(vnode_t *dvp, rfs4_ss_pn_t *ss_pn)
477 {
478 struct uio uio;
479 struct iovec iov[3];
480
481 rfs4_oldstate_t *cl_ss = NULL;
482 vnode_t *vp;
483 vattr_t va;
484 uint_t id_len;
485 int err, kill_file, file_vers;
486
487 if (ss_pn == NULL)
488 return (NULL);
489
490 /*
491 * open the state file.
492 */
493 if (vn_open(ss_pn->pn, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0) != 0) {
494 return (NULL);
495 }
496
497 if (vp->v_type != VREG) {
498 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
499 VN_RELE(vp);
500 return (NULL);
501 }
502
503 err = VOP_ACCESS(vp, VREAD, 0, CRED(), NULL);
504 if (err) {
505 /*
506 * We don't have read access? better get the heck out.
507 */
508 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
509 VN_RELE(vp);
510 return (NULL);
511 }
512
513 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
514 /*
515 * get the file size to do some basic validation
516 */
517 va.va_mask = AT_SIZE;
518 err = VOP_GETATTR(vp, &va, 0, CRED(), NULL);
519
520 kill_file = (va.va_size == 0 || va.va_size <
521 (NFS4_VERIFIER_SIZE + sizeof (uint_t)+1));
522
523 if (err || kill_file) {
524 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
525 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
526 VN_RELE(vp);
527 if (kill_file) {
528 (void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0);
529 }
530 return (NULL);
531 }
532
533 cl_ss = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
534
535 /*
536 * build iovecs to read in the file_version, verifier and id_len
537 */
538 iov[0].iov_base = (caddr_t)&file_vers;
539 iov[0].iov_len = sizeof (int);
540 iov[1].iov_base = (caddr_t)&cl_ss->cl_id4.verifier;
541 iov[1].iov_len = NFS4_VERIFIER_SIZE;
542 iov[2].iov_base = (caddr_t)&id_len;
543 iov[2].iov_len = sizeof (uint_t);
544
545 uio.uio_iov = iov;
546 uio.uio_iovcnt = 3;
547 uio.uio_segflg = UIO_SYSSPACE;
548 uio.uio_loffset = 0;
549 uio.uio_resid = sizeof (int) + NFS4_VERIFIER_SIZE + sizeof (uint_t);
550
551 err = VOP_READ(vp, &uio, FREAD, CRED(), NULL);
552 if (err != 0) {
553 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
554 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
555 VN_RELE(vp);
556 kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
557 return (NULL);
558 }
559
560 /*
561 * if the file_version doesn't match or if the
562 * id_len is zero or the combination of the verifier,
563 * id_len and id_val is bigger than the file we have
564 * a problem. If so ditch the file.
565 */
566 kill_file = (file_vers != NFS4_SS_VERSION || id_len == 0 ||
567 (id_len + NFS4_VERIFIER_SIZE + sizeof (uint_t)) > va.va_size);
568
569 if (err || kill_file) {
570 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
571 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
572 VN_RELE(vp);
573 kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
574 if (kill_file) {
575 (void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0);
576 }
577 return (NULL);
578 }
579
580 /*
581 * now get the client id value
582 */
583 cl_ss->cl_id4.id_val = kmem_alloc(id_len, KM_SLEEP);
584 iov[0].iov_base = cl_ss->cl_id4.id_val;
585 iov[0].iov_len = id_len;
586
587 uio.uio_iov = iov;
588 uio.uio_iovcnt = 1;
589 uio.uio_segflg = UIO_SYSSPACE;
590 uio.uio_resid = cl_ss->cl_id4.id_len = id_len;
591
592 err = VOP_READ(vp, &uio, FREAD, CRED(), NULL);
593 if (err != 0) {
594 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
595 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
596 VN_RELE(vp);
597 kmem_free(cl_ss->cl_id4.id_val, id_len);
598 kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
599 return (NULL);
600 }
601
602 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
603 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
604 VN_RELE(vp);
605 return (cl_ss);
606 }
607
608 #ifdef nextdp
609 #undef nextdp
610 #endif
611 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
612
613 /*
614 * Check whether list already contains the client
615 * This protects against counting the same client twice.
616 */
617 static bool_t
rfs4_ss_has_client(rfs4_oldstate_t * head,nfs_client_id4 * client)618 rfs4_ss_has_client(rfs4_oldstate_t *head, nfs_client_id4 *client)
619 {
620 rfs4_oldstate_t *p;
621
622 for (p = head->next; p != head; p = p->next) {
623 nfs_client_id4 *m = &p->cl_id4;
624
625 if (m->id_len != client->id_len)
626 continue;
627
628 if (bcmp(m->id_val, client->id_val, client->id_len) == 0)
629 continue;
630
631 /* client ids match */
632 return (TRUE);
633 }
634
635 return (FALSE);
636 }
637
638 /*
639 * Add entries from statedir to supplied oldstate list.
640 * Optionally, move all entries from statedir -> destdir.
641 */
642 static void
rfs4_ss_oldstate(rfs4_oldstate_t * oldstate,char * statedir,char * destdir)643 rfs4_ss_oldstate(rfs4_oldstate_t *oldstate, char *statedir, char *destdir)
644 {
645 rfs4_ss_pn_t *ss_pn;
646 rfs4_oldstate_t *cl_ss = NULL;
647 char *dirt = NULL;
648 int err, dir_eof = 0, size = 0;
649 vnode_t *dvp;
650 struct iovec iov;
651 struct uio uio;
652 struct dirent64 *dep;
653 offset_t dirchunk_offset = 0;
654 unsigned int nclients = 0;
655
656 /*
657 * open the state directory
658 */
659 if (vn_open(statedir, UIO_SYSSPACE, FREAD, 0, &dvp, 0, 0))
660 return;
661
662 if (dvp->v_type != VDIR || VOP_ACCESS(dvp, VREAD, 0, CRED(), NULL))
663 goto out;
664
665 dirt = kmem_alloc(RFS4_SS_DIRSIZE, KM_SLEEP);
666
667 /*
668 * Get and process the directory entries
669 */
670 while (!dir_eof) {
671 (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL);
672 iov.iov_base = dirt;
673 iov.iov_len = RFS4_SS_DIRSIZE;
674 uio.uio_iov = &iov;
675 uio.uio_iovcnt = 1;
676 uio.uio_segflg = UIO_SYSSPACE;
677 uio.uio_loffset = dirchunk_offset;
678 uio.uio_resid = RFS4_SS_DIRSIZE;
679
680 err = VOP_READDIR(dvp, &uio, CRED(), &dir_eof, NULL, 0);
681 VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
682 if (err)
683 goto out;
684
685 size = RFS4_SS_DIRSIZE - uio.uio_resid;
686
687 /*
688 * Process all the directory entries in this
689 * readdir chunk
690 */
691 for (dep = (struct dirent64 *)dirt; size > 0;
692 dep = nextdp(dep)) {
693
694 size -= dep->d_reclen;
695 dirchunk_offset = dep->d_off;
696
697 /*
698 * Skip '.' and '..'
699 */
700 if (NFS_IS_DOTNAME(dep->d_name))
701 continue;
702
703 ss_pn = rfs4_ss_pnalloc(statedir, dep->d_name);
704 if (ss_pn == NULL)
705 continue;
706
707 cl_ss = rfs4_ss_getstate(dvp, ss_pn);
708 if (cl_ss != NULL) {
709 if (destdir != NULL) {
710 rfs4_ss_pnfree(ss_pn);
711 cl_ss->ss_pn = rfs4_ss_movestate(
712 statedir, destdir, dep->d_name);
713 } else {
714 cl_ss->ss_pn = ss_pn;
715 }
716
717 if (!rfs4_ss_has_client(oldstate,
718 &cl_ss->cl_id4))
719 nclients++;
720
721 insque(cl_ss, oldstate);
722 } else {
723 rfs4_ss_pnfree(ss_pn);
724 }
725 }
726 }
727
728 out:
729 (void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL);
730 VN_RELE(dvp);
731 if (dirt)
732 kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE);
733
734 if (nclients > 0) {
735 nfs4_srv_t *nsrv4 = nfs4_get_srv();
736
737 atomic_add_32(&(nsrv4->nfs4_cur_servinst->nreclaim), nclients);
738 }
739 }
740
741 static void
rfs4_ss_init(nfs4_srv_t * nsrv4)742 rfs4_ss_init(nfs4_srv_t *nsrv4)
743 {
744 int npaths = 1;
745 char *default_dss_path = NFS4_DSS_VAR_DIR;
746
747 /* read the default stable storage state */
748 rfs4_dss_readstate(nsrv4, npaths, &default_dss_path);
749
750 rfs4_ss_enabled = 1;
751 }
752
753 static void
rfs4_ss_fini(nfs4_srv_t * nsrv4)754 rfs4_ss_fini(nfs4_srv_t *nsrv4)
755 {
756 rfs4_servinst_t *sip;
757
758 mutex_enter(&nsrv4->servinst_lock);
759 sip = nsrv4->nfs4_cur_servinst;
760 while (sip != NULL) {
761 rfs4_dss_clear_oldstate(sip);
762 sip = sip->next;
763 }
764 mutex_exit(&nsrv4->servinst_lock);
765 }
766
767 /*
768 * Remove all oldstate files referenced by this servinst.
769 */
770 static void
rfs4_dss_clear_oldstate(rfs4_servinst_t * sip)771 rfs4_dss_clear_oldstate(rfs4_servinst_t *sip)
772 {
773 rfs4_oldstate_t *os_head, *osp;
774
775 rw_enter(&sip->oldstate_lock, RW_WRITER);
776 os_head = sip->oldstate;
777
778 if (os_head == NULL) {
779 rw_exit(&sip->oldstate_lock);
780 return;
781 }
782
783 /* skip dummy entry */
784 osp = os_head->next;
785 while (osp != os_head) {
786 char *leaf = osp->ss_pn->leaf;
787 rfs4_oldstate_t *os_next;
788
789 rfs4_dss_remove_leaf(sip, NFS4_DSS_OLDSTATE_LEAF, leaf);
790
791 if (osp->cl_id4.id_val)
792 kmem_free(osp->cl_id4.id_val, osp->cl_id4.id_len);
793 rfs4_ss_pnfree(osp->ss_pn);
794
795 os_next = osp->next;
796 remque(osp);
797 kmem_free(osp, sizeof (rfs4_oldstate_t));
798 osp = os_next;
799 }
800
801 rw_exit(&sip->oldstate_lock);
802 }
803
804 /*
805 * Form the state and oldstate paths, and read in the stable storage files.
806 */
807 void
rfs4_dss_readstate(nfs4_srv_t * nsrv4,int npaths,char ** paths)808 rfs4_dss_readstate(nfs4_srv_t *nsrv4, int npaths, char **paths)
809 {
810 int i;
811 char *state, *oldstate;
812
813 state = kmem_alloc(MAXPATHLEN, KM_SLEEP);
814 oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP);
815
816 for (i = 0; i < npaths; i++) {
817 char *path = paths[i];
818
819 (void) sprintf(state, "%s/%s", path, NFS4_DSS_STATE_LEAF);
820 (void) sprintf(oldstate, "%s/%s", path, NFS4_DSS_OLDSTATE_LEAF);
821
822 /*
823 * Populate the current server instance's oldstate list.
824 *
825 * 1. Read stable storage data from old state directory,
826 * leaving its contents alone.
827 *
828 * 2. Read stable storage data from state directory,
829 * and move the latter's contents to old state
830 * directory.
831 */
832 rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate,
833 oldstate, NULL);
834 rfs4_ss_oldstate(nsrv4->nfs4_cur_servinst->oldstate,
835 state, oldstate);
836 }
837
838 kmem_free(state, MAXPATHLEN);
839 kmem_free(oldstate, MAXPATHLEN);
840 }
841
842
843 /*
844 * Check if we are still in grace and if the client can be
845 * granted permission to perform reclaims.
846 */
847 void
rfs4_ss_chkclid(nfs4_srv_t * nsrv4,rfs4_client_t * cp)848 rfs4_ss_chkclid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
849 {
850 rfs4_servinst_t *sip;
851
852 /*
853 * It should be sufficient to check the oldstate data for just
854 * this client's instance. However, since our per-instance
855 * client grouping is solely temporal, HA-NFSv4 RG failover
856 * might result in clients of the same RG being partitioned into
857 * separate instances.
858 *
859 * Until the client grouping is improved, we must check the
860 * oldstate data for all instances with an active grace period.
861 *
862 * This also serves as the mechanism to remove stale oldstate data.
863 * The first time we check an instance after its grace period has
864 * expired, the oldstate data should be cleared.
865 *
866 * Start at the current instance, and walk the list backwards
867 * to the first.
868 */
869 mutex_enter(&nsrv4->servinst_lock);
870 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
871 rfs4_ss_chkclid_sip(cp, sip);
872
873 /* if the above check found this client, we're done */
874 if (cp->rc_can_reclaim)
875 break;
876 }
877 mutex_exit(&nsrv4->servinst_lock);
878 }
879
880 static void
rfs4_ss_chkclid_sip(rfs4_client_t * cp,rfs4_servinst_t * sip)881 rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip)
882 {
883 rfs4_oldstate_t *osp, *os_head;
884
885 /* short circuit everything if this server instance has no oldstate */
886 rw_enter(&sip->oldstate_lock, RW_READER);
887 os_head = sip->oldstate;
888 rw_exit(&sip->oldstate_lock);
889 if (os_head == NULL)
890 return;
891
892 /*
893 * If this server instance is no longer in a grace period then
894 * the client won't be able to reclaim. No further need for this
895 * instance's oldstate data, so it can be cleared.
896 */
897 if (!rfs4_servinst_in_grace(sip))
898 return;
899
900 /* this instance is still in grace; search for the clientid */
901
902 rw_enter(&sip->oldstate_lock, RW_READER);
903
904 os_head = sip->oldstate;
905 /* skip dummy entry */
906 osp = os_head->next;
907 while (osp != os_head) {
908 if (osp->cl_id4.id_len == cp->rc_nfs_client.id_len) {
909 if (bcmp(osp->cl_id4.id_val, cp->rc_nfs_client.id_val,
910 osp->cl_id4.id_len) == 0) {
911 cp->rc_can_reclaim = 1;
912 break;
913 }
914 }
915 osp = osp->next;
916 }
917
918 rw_exit(&sip->oldstate_lock);
919 }
920
921 /*
922 * Place client information into stable storage: 1/3.
923 * First, generate the leaf filename, from the client's IP address and
924 * the server-generated short-hand clientid.
925 */
926 void
rfs4_ss_clid(nfs4_srv_t * nsrv4,rfs4_client_t * cp)927 rfs4_ss_clid(nfs4_srv_t *nsrv4, rfs4_client_t *cp)
928 {
929 const char *kinet_ntop6(uchar_t *, char *, size_t);
930 char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
931 struct sockaddr *ca;
932 uchar_t *b;
933
934 if (rfs4_ss_enabled == 0) {
935 return;
936 }
937
938 buf[0] = 0;
939
940 ca = (struct sockaddr *)&cp->rc_addr;
941
942 /*
943 * Convert the caller's IP address to a dotted string
944 */
945 if (ca->sa_family == AF_INET) {
946 b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
947 (void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF,
948 b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
949 } else if (ca->sa_family == AF_INET6) {
950 struct sockaddr_in6 *sin6;
951
952 sin6 = (struct sockaddr_in6 *)ca;
953 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
954 buf, INET6_ADDRSTRLEN);
955 }
956
957 (void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
958 (longlong_t)cp->rc_clientid);
959 rfs4_ss_clid_write(nsrv4, cp, leaf);
960 }
961
962 /*
963 * Place client information into stable storage: 2/3.
964 * DSS: distributed stable storage: the file may need to be written to
965 * multiple directories.
966 */
967 static void
rfs4_ss_clid_write(nfs4_srv_t * nsrv4,rfs4_client_t * cp,char * leaf)968 rfs4_ss_clid_write(nfs4_srv_t *nsrv4, rfs4_client_t *cp, char *leaf)
969 {
970 rfs4_servinst_t *sip;
971
972 /*
973 * It should be sufficient to write the leaf file to (all) DSS paths
974 * associated with just this client's instance. However, since our
975 * per-instance client grouping is solely temporal, HA-NFSv4 RG
976 * failover might result in us losing DSS data.
977 *
978 * Until the client grouping is improved, we must write the DSS data
979 * to all instances' paths. Start at the current instance, and
980 * walk the list backwards to the first.
981 */
982 mutex_enter(&nsrv4->servinst_lock);
983 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
984 int i, npaths = sip->dss_npaths;
985
986 /* write the leaf file to all DSS paths */
987 for (i = 0; i < npaths; i++) {
988 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
989
990 /* HA-NFSv4 path might have been failed-away from us */
991 if (dss_path == NULL)
992 continue;
993
994 rfs4_ss_clid_write_one(cp, dss_path->path, leaf);
995 }
996 }
997 mutex_exit(&nsrv4->servinst_lock);
998 }
999
1000 /*
1001 * Place client information into stable storage: 3/3.
1002 * Write the stable storage data to the requested file.
1003 */
1004 static void
rfs4_ss_clid_write_one(rfs4_client_t * cp,char * dss_path,char * leaf)1005 rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf)
1006 {
1007 int ioflag;
1008 int file_vers = NFS4_SS_VERSION;
1009 size_t dirlen;
1010 struct uio uio;
1011 struct iovec iov[4];
1012 char *dir;
1013 rfs4_ss_pn_t *ss_pn;
1014 vnode_t *vp;
1015 nfs_client_id4 *cl_id4 = &(cp->rc_nfs_client);
1016
1017 /* allow 2 extra bytes for '/' & NUL */
1018 dirlen = strlen(dss_path) + strlen(NFS4_DSS_STATE_LEAF) + 2;
1019 dir = kmem_alloc(dirlen, KM_SLEEP);
1020 (void) sprintf(dir, "%s/%s", dss_path, NFS4_DSS_STATE_LEAF);
1021
1022 ss_pn = rfs4_ss_pnalloc(dir, leaf);
1023 /* rfs4_ss_pnalloc takes its own copy */
1024 kmem_free(dir, dirlen);
1025 if (ss_pn == NULL)
1026 return;
1027
1028 if (vn_open(ss_pn->pn, UIO_SYSSPACE, FCREAT|FWRITE, 0600, &vp,
1029 CRCREAT, 0)) {
1030 rfs4_ss_pnfree(ss_pn);
1031 return;
1032 }
1033
1034 /*
1035 * We need to record leaf - i.e. the filename - so that we know
1036 * what to remove, in the future. However, the dir part of cp->ss_pn
1037 * should never be referenced directly, since it's potentially only
1038 * one of several paths with this leaf in it.
1039 */
1040 if (cp->rc_ss_pn != NULL) {
1041 if (strcmp(cp->rc_ss_pn->leaf, leaf) == 0) {
1042 /* we've already recorded *this* leaf */
1043 rfs4_ss_pnfree(ss_pn);
1044 } else {
1045 /* replace with this leaf */
1046 rfs4_ss_pnfree(cp->rc_ss_pn);
1047 cp->rc_ss_pn = ss_pn;
1048 }
1049 } else {
1050 cp->rc_ss_pn = ss_pn;
1051 }
1052
1053 /*
1054 * Build a scatter list that points to the nfs_client_id4
1055 */
1056 iov[0].iov_base = (caddr_t)&file_vers;
1057 iov[0].iov_len = sizeof (int);
1058 iov[1].iov_base = (caddr_t)&(cl_id4->verifier);
1059 iov[1].iov_len = NFS4_VERIFIER_SIZE;
1060 iov[2].iov_base = (caddr_t)&(cl_id4->id_len);
1061 iov[2].iov_len = sizeof (uint_t);
1062 iov[3].iov_base = (caddr_t)cl_id4->id_val;
1063 iov[3].iov_len = cl_id4->id_len;
1064
1065 uio.uio_iov = iov;
1066 uio.uio_iovcnt = 4;
1067 uio.uio_loffset = 0;
1068 uio.uio_segflg = UIO_SYSSPACE;
1069 uio.uio_llimit = (rlim64_t)MAXOFFSET_T;
1070 uio.uio_resid = cl_id4->id_len + sizeof (int) +
1071 NFS4_VERIFIER_SIZE + sizeof (uint_t);
1072
1073 ioflag = uio.uio_fmode = (FWRITE|FSYNC);
1074 uio.uio_extflg = UIO_COPY_DEFAULT;
1075
1076 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1077 /* write the full client id to the file. */
1078 (void) VOP_WRITE(vp, &uio, ioflag, CRED(), NULL);
1079 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1080
1081 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
1082 VN_RELE(vp);
1083 }
1084
1085 /*
1086 * DSS: distributed stable storage.
1087 * Unpack the list of paths passed by nfsd.
1088 * Use nvlist_alloc(9F) to manage the data.
1089 * The caller is responsible for allocating and freeing the buffer.
1090 */
1091 int
rfs4_dss_setpaths(char * buf,size_t buflen)1092 rfs4_dss_setpaths(char *buf, size_t buflen)
1093 {
1094 int error;
1095
1096 /*
1097 * If this is a "warm start", i.e. we previously had DSS paths,
1098 * preserve the old paths.
1099 */
1100 if (rfs4_dss_paths != NULL) {
1101 /*
1102 * Before we lose the ptr, destroy the nvlist and pathnames
1103 * array from the warm start before this one.
1104 */
1105 nvlist_free(rfs4_dss_oldpaths);
1106 rfs4_dss_oldpaths = rfs4_dss_paths;
1107 }
1108
1109 /* unpack the buffer into a searchable nvlist */
1110 error = nvlist_unpack(buf, buflen, &rfs4_dss_paths, KM_SLEEP);
1111 if (error)
1112 return (error);
1113
1114 /*
1115 * Search the nvlist for the pathnames nvpair (which is the only nvpair
1116 * in the list, and record its location.
1117 */
1118 error = nvlist_lookup_string_array(rfs4_dss_paths, NFS4_DSS_NVPAIR_NAME,
1119 &rfs4_dss_newpaths, &rfs4_dss_numnewpaths);
1120 return (error);
1121 }
1122
1123 /*
1124 * Ultimately the nfssys() call NFS4_CLR_STATE endsup here
1125 * to find and mark the client for forced expire.
1126 */
1127 static void
rfs4_client_scrub(rfs4_entry_t ent,void * arg)1128 rfs4_client_scrub(rfs4_entry_t ent, void *arg)
1129 {
1130 rfs4_client_t *cp = (rfs4_client_t *)ent;
1131 struct nfs4clrst_args *clr = arg;
1132 struct sockaddr_in6 *ent_sin6;
1133 struct in6_addr clr_in6;
1134 struct sockaddr_in *ent_sin;
1135 struct in_addr clr_in;
1136
1137 if (clr->addr_type != cp->rc_addr.ss_family) {
1138 return;
1139 }
1140
1141 switch (clr->addr_type) {
1142
1143 case AF_INET6:
1144 /* copyin the address from user space */
1145 if (copyin(clr->ap, &clr_in6, sizeof (clr_in6))) {
1146 break;
1147 }
1148
1149 ent_sin6 = (struct sockaddr_in6 *)&cp->rc_addr;
1150
1151 /*
1152 * now compare, and if equivalent mark entry
1153 * for forced expiration
1154 */
1155 if (IN6_ARE_ADDR_EQUAL(&ent_sin6->sin6_addr, &clr_in6)) {
1156 cp->rc_forced_expire = 1;
1157 }
1158 break;
1159
1160 case AF_INET:
1161 /* copyin the address from user space */
1162 if (copyin(clr->ap, &clr_in, sizeof (clr_in))) {
1163 break;
1164 }
1165
1166 ent_sin = (struct sockaddr_in *)&cp->rc_addr;
1167
1168 /*
1169 * now compare, and if equivalent mark entry
1170 * for forced expiration
1171 */
1172 if (ent_sin->sin_addr.s_addr == clr_in.s_addr) {
1173 cp->rc_forced_expire = 1;
1174 }
1175 break;
1176
1177 default:
1178 /* force this assert to fail */
1179 ASSERT(clr->addr_type != clr->addr_type);
1180 }
1181 }
1182
1183 /*
1184 * This is called from nfssys() in order to clear server state
1185 * for the specified client IP Address.
1186 */
1187 int
rfs4_clear_client_state(struct nfs4clrst_args * clr)1188 rfs4_clear_client_state(struct nfs4clrst_args *clr)
1189 {
1190 nfs4_srv_t *nsrv4 = nfs4_get_srv();
1191 int rc;
1192
1193 /* Once nfssrv is loaded, every zone should have one of these. */
1194 VERIFY(nsrv4 != NULL);
1195
1196 mutex_enter(&nsrv4->state_lock);
1197 /*
1198 * But only after NFS service is running is the nfs4_server_state
1199 * around. It's dirty (and needs the state_lock held), but all of the
1200 * databases live deep in the nfs4_server_state, so it's the only thing
1201 * to legitimately check prior to using anything. The pointers
1202 * themselves may be stale.
1203 */
1204 if (nsrv4->nfs4_server_state != NULL) {
1205 VERIFY(nsrv4->rfs4_client_tab != NULL);
1206 rfs4_dbe_walk(nsrv4->rfs4_client_tab, rfs4_client_scrub, clr);
1207 rc = 0;
1208 } else {
1209 rc = ENXIO;
1210 }
1211 mutex_exit(&nsrv4->state_lock);
1212 return (rc);
1213 }
1214
1215 /*
1216 * Used to initialize the NFSv4 server's state or database. All of
1217 * the tables are created and timers are set.
1218 */
1219 void
rfs4_state_g_init(void)1220 rfs4_state_g_init(void)
1221 {
1222 extern boolean_t rfs4_cpr_callb(void *, int);
1223 /*
1224 * Add a CPR callback so that we can update client
1225 * access times to extend the lease after a suspend
1226 * and resume (using the same class as rpcmod/connmgr)
1227 */
1228 cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");
1229
1230 /*
1231 * NFSv4 server state databases
1232 *
1233 * Initialized when the module is loaded and used by NFSv4 state
1234 * tables. These kmem_cache free pools are used globally, the NFSv4
1235 * state tables which make use of these kmem_cache free pools are per
1236 * zone.
1237 *
1238 * initialize the global kmem_cache free pools which will be used by
1239 * the NFSv4 state tables.
1240 */
1241 rfs4_client_mem_cache = nfs4_init_mem_cache("Client_entry_cache",
1242 2, sizeof (rfs4_client_t), 0);
1243 rfs4_clntIP_mem_cache = nfs4_init_mem_cache("ClntIP_entry_cache",
1244 1, sizeof (rfs4_clntip_t), 1);
1245 rfs4_openown_mem_cache = nfs4_init_mem_cache("OpenOwner_entry_cache",
1246 1, sizeof (rfs4_openowner_t), 2);
1247 rfs4_openstID_mem_cache = nfs4_init_mem_cache("OpenStateID_entry_cache",
1248 3, sizeof (rfs4_state_t), 3);
1249 rfs4_lockstID_mem_cache = nfs4_init_mem_cache("LockStateID_entry_cache",
1250 3, sizeof (rfs4_lo_state_t), 4);
1251 rfs4_lockown_mem_cache = nfs4_init_mem_cache("Lockowner_entry_cache",
1252 2, sizeof (rfs4_lockowner_t), 5);
1253 rfs4_file_mem_cache = nfs4_init_mem_cache("File_entry_cache",
1254 1, sizeof (rfs4_file_t), 6);
1255 rfs4_delegstID_mem_cache =
1256 nfs4_init_mem_cache("DelegStateID_entry_cache", 2,
1257 sizeof (rfs4_deleg_state_t), 7);
1258 rfs4_session_mem_cache = nfs4_init_mem_cache("Session_entry_cache",
1259 1, sizeof (rfs4_session_t), 8);
1260 }
1261
1262
1263 /*
1264 * Used at server shutdown to cleanup all of the NFSv4 server's structures
1265 * and other state.
1266 */
1267 void
rfs4_state_g_fini(void)1268 rfs4_state_g_fini(void)
1269 {
1270 int i;
1271 /*
1272 * Cleanup the CPR callback.
1273 */
1274 if (cpr_id)
1275 (void) callb_delete(cpr_id);
1276
1277 /* free the NFSv4 state databases */
1278 for (i = 0; i < RFS4_DB_MEM_CACHE_NUM; i++) {
1279 kmem_cache_destroy(rfs4_db_mem_cache_table[i].r_db_mem_cache);
1280 rfs4_db_mem_cache_table[i].r_db_mem_cache = NULL;
1281 }
1282
1283 rfs4_client_mem_cache = NULL;
1284 rfs4_clntIP_mem_cache = NULL;
1285 rfs4_openown_mem_cache = NULL;
1286 rfs4_openstID_mem_cache = NULL;
1287 rfs4_lockstID_mem_cache = NULL;
1288 rfs4_lockown_mem_cache = NULL;
1289 rfs4_file_mem_cache = NULL;
1290 rfs4_delegstID_mem_cache = NULL;
1291 rfs4_session_mem_cache = NULL;
1292
1293 /* DSS: distributed stable storage */
1294 nvlist_free(rfs4_dss_oldpaths);
1295 nvlist_free(rfs4_dss_paths);
1296 rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
1297 }
1298
1299 /*
1300 * Used to initialize the per zone NFSv4 server's state
1301 */
1302 void
rfs4_state_zone_init(nfs4_srv_t * nsrv4)1303 rfs4_state_zone_init(nfs4_srv_t *nsrv4)
1304 {
1305 time_t start_time;
1306 int start_grace;
1307 char *dss_path = NFS4_DSS_VAR_DIR;
1308
1309 /* DSS: distributed stable storage: initialise served paths list */
1310 nsrv4->dss_pathlist = NULL;
1311
1312 /*
1313 * Set the boot time. If the server
1314 * has been restarted quickly and has had the opportunity to
1315 * service clients, then the start_time needs to be bumped
1316 * regardless. A small window but it exists...
1317 */
1318 start_time = gethrestime_sec();
1319 if (nsrv4->rfs4_start_time < start_time)
1320 nsrv4->rfs4_start_time = start_time;
1321 else
1322 nsrv4->rfs4_start_time++;
1323
1324 /*
1325 * Create the first server instance, or a new one if the server has
1326 * been restarted; see above comments on rfs4_start_time. Don't
1327 * start its grace period; that will be done later, to maximise the
1328 * clients' recovery window.
1329 */
1330 start_grace = 0;
1331 if (curzone == global_zone && rfs4_dss_numnewpaths > 0) {
1332 int i;
1333 char **dss_allpaths = NULL;
1334 dss_allpaths = kmem_alloc(sizeof (char *) *
1335 (rfs4_dss_numnewpaths + 1), KM_SLEEP);
1336 /*
1337 * Add the default path into the list of paths for saving
1338 * state informantion.
1339 */
1340 dss_allpaths[0] = dss_path;
1341 for (i = 0; i < rfs4_dss_numnewpaths; i++) {
1342 dss_allpaths[i + 1] = rfs4_dss_newpaths[i];
1343 }
1344 rfs4_servinst_create(nsrv4, start_grace,
1345 (rfs4_dss_numnewpaths + 1), dss_allpaths);
1346 kmem_free(dss_allpaths,
1347 (sizeof (char *) * (rfs4_dss_numnewpaths + 1)));
1348 } else {
1349 rfs4_servinst_create(nsrv4, start_grace, 1, &dss_path);
1350 }
1351
1352 /* reset the "first NFSv4 request" status */
1353 nsrv4->seen_first_compound = 0;
1354
1355 mutex_enter(&nsrv4->state_lock);
1356
1357 /*
1358 * If the server state database has already been initialized,
1359 * skip it
1360 */
1361 if (nsrv4->nfs4_server_state != NULL) {
1362 mutex_exit(&nsrv4->state_lock);
1363 return;
1364 }
1365
1366 rw_init(&nsrv4->rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);
1367
1368 /* set the various cache timers for table creation */
1369 if (nsrv4->rfs4_client_cache_time == 0)
1370 nsrv4->rfs4_client_cache_time = CLIENT_CACHE_TIME;
1371 if (nsrv4->rfs4_openowner_cache_time == 0)
1372 nsrv4->rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
1373 if (nsrv4->rfs4_state_cache_time == 0)
1374 nsrv4->rfs4_state_cache_time = STATE_CACHE_TIME;
1375 if (nsrv4->rfs4_lo_state_cache_time == 0)
1376 nsrv4->rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
1377 if (nsrv4->rfs4_lockowner_cache_time == 0)
1378 nsrv4->rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
1379 if (nsrv4->rfs4_file_cache_time == 0)
1380 nsrv4->rfs4_file_cache_time = FILE_CACHE_TIME;
1381 if (nsrv4->rfs4_deleg_state_cache_time == 0)
1382 nsrv4->rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
1383
1384 /* Create the overall database to hold all server state */
1385 nsrv4->nfs4_server_state = rfs4_database_create(rfs4_database_debug);
1386
1387 /* Now create the individual tables */
1388 nsrv4->rfs4_client_cache_time *= rfs4_lease_time;
1389 nsrv4->rfs4_client_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1390 "Client",
1391 nsrv4->rfs4_client_cache_time,
1392 2,
1393 rfs4_client_create,
1394 rfs4_client_destroy,
1395 rfs4_client_expiry,
1396 sizeof (rfs4_client_t),
1397 TABSIZE,
1398 MAXTABSZ/8, 100);
1399 nsrv4->rfs4_nfsclnt_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1400 "nfs_client_id4", nfsclnt_hash,
1401 nfsclnt_compare, nfsclnt_mkkey,
1402 TRUE);
1403 nsrv4->rfs4_clientid_idx = rfs4_index_create(nsrv4->rfs4_client_tab,
1404 "client_id", clientid_hash,
1405 clientid_compare, clientid_mkkey,
1406 FALSE);
1407
1408 nsrv4->rfs4_clntip_cache_time = 86400 * 365; /* about a year */
1409 nsrv4->rfs4_clntip_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1410 "ClntIP",
1411 nsrv4->rfs4_clntip_cache_time,
1412 1,
1413 rfs4_clntip_create,
1414 rfs4_clntip_destroy,
1415 rfs4_clntip_expiry,
1416 sizeof (rfs4_clntip_t),
1417 TABSIZE,
1418 MAXTABSZ, 100);
1419 nsrv4->rfs4_clntip_idx = rfs4_index_create(nsrv4->rfs4_clntip_tab,
1420 "client_ip", clntip_hash,
1421 clntip_compare, clntip_mkkey,
1422 TRUE);
1423
1424 nsrv4->rfs4_openowner_cache_time *= rfs4_lease_time;
1425 nsrv4->rfs4_openowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1426 "OpenOwner",
1427 nsrv4->rfs4_openowner_cache_time,
1428 1,
1429 rfs4_openowner_create,
1430 rfs4_openowner_destroy,
1431 rfs4_openowner_expiry,
1432 sizeof (rfs4_openowner_t),
1433 TABSIZE,
1434 MAXTABSZ, 100);
1435 nsrv4->rfs4_openowner_idx = rfs4_index_create(nsrv4->rfs4_openowner_tab,
1436 "open_owner4", openowner_hash,
1437 openowner_compare,
1438 openowner_mkkey, TRUE);
1439
1440 nsrv4->rfs4_state_cache_time *= rfs4_lease_time;
1441 nsrv4->rfs4_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1442 "OpenStateID",
1443 nsrv4->rfs4_state_cache_time,
1444 3,
1445 rfs4_state_create,
1446 rfs4_state_destroy,
1447 rfs4_state_expiry,
1448 sizeof (rfs4_state_t),
1449 TABSIZE,
1450 MAXTABSZ, 100);
1451
1452 /* CSTYLED */
1453 nsrv4->rfs4_state_owner_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1454 "Openowner-File",
1455 state_owner_file_hash,
1456 state_owner_file_compare,
1457 state_owner_file_mkkey, TRUE);
1458
1459 nsrv4->rfs4_state_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1460 "State-id", state_hash,
1461 state_compare, state_mkkey, FALSE);
1462
1463 nsrv4->rfs4_state_file_idx = rfs4_index_create(nsrv4->rfs4_state_tab,
1464 "File", state_file_hash,
1465 state_file_compare, state_file_mkkey,
1466 FALSE);
1467
1468 nsrv4->rfs4_lo_state_cache_time *= rfs4_lease_time;
1469 nsrv4->rfs4_lo_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1470 "LockStateID",
1471 nsrv4->rfs4_lo_state_cache_time,
1472 2,
1473 rfs4_lo_state_create,
1474 rfs4_lo_state_destroy,
1475 rfs4_lo_state_expiry,
1476 sizeof (rfs4_lo_state_t),
1477 TABSIZE,
1478 MAXTABSZ, 100);
1479
1480 /* CSTYLED */
1481 nsrv4->rfs4_lo_state_owner_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1482 "lockownerxstate",
1483 lo_state_lo_hash,
1484 lo_state_lo_compare,
1485 lo_state_lo_mkkey, TRUE);
1486
1487 nsrv4->rfs4_lo_state_idx = rfs4_index_create(nsrv4->rfs4_lo_state_tab,
1488 "State-id",
1489 lo_state_hash, lo_state_compare,
1490 lo_state_mkkey, FALSE);
1491
1492 nsrv4->rfs4_lockowner_cache_time *= rfs4_lease_time;
1493
1494 nsrv4->rfs4_lockowner_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1495 "Lockowner",
1496 nsrv4->rfs4_lockowner_cache_time,
1497 2,
1498 rfs4_lockowner_create,
1499 rfs4_lockowner_destroy,
1500 rfs4_lockowner_expiry,
1501 sizeof (rfs4_lockowner_t),
1502 TABSIZE,
1503 MAXTABSZ, 100);
1504
1505 nsrv4->rfs4_lockowner_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1506 "lock_owner4", lockowner_hash,
1507 lockowner_compare,
1508 lockowner_mkkey, TRUE);
1509
1510 /* CSTYLED */
1511 nsrv4->rfs4_lockowner_pid_idx = rfs4_index_create(nsrv4->rfs4_lockowner_tab,
1512 "pid", pid_hash,
1513 pid_compare, pid_mkkey,
1514 FALSE);
1515
1516 nsrv4->rfs4_file_cache_time *= rfs4_lease_time;
1517 nsrv4->rfs4_file_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1518 "File",
1519 nsrv4->rfs4_file_cache_time,
1520 1,
1521 rfs4_file_create,
1522 rfs4_file_destroy,
1523 NULL,
1524 sizeof (rfs4_file_t),
1525 TABSIZE,
1526 MAXTABSZ, -1);
1527
1528 nsrv4->rfs4_file_idx = rfs4_index_create(nsrv4->rfs4_file_tab,
1529 "Filehandle", file_hash,
1530 file_compare, file_mkkey, TRUE);
1531
1532 nsrv4->rfs4_deleg_state_cache_time *= rfs4_lease_time;
1533 /* CSTYLED */
1534 nsrv4->rfs4_deleg_state_tab = rfs4_table_create(nsrv4->nfs4_server_state,
1535 "DelegStateID",
1536 nsrv4->rfs4_deleg_state_cache_time,
1537 2,
1538 rfs4_deleg_state_create,
1539 rfs4_deleg_state_destroy,
1540 rfs4_deleg_state_expiry,
1541 sizeof (rfs4_deleg_state_t),
1542 TABSIZE,
1543 MAXTABSZ, 100);
1544 nsrv4->rfs4_deleg_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1545 "DelegByFileClient",
1546 deleg_hash,
1547 deleg_compare,
1548 deleg_mkkey, TRUE);
1549
1550 /* CSTYLED */
1551 nsrv4->rfs4_deleg_state_idx = rfs4_index_create(nsrv4->rfs4_deleg_state_tab,
1552 "DelegState",
1553 deleg_state_hash,
1554 deleg_state_compare,
1555 deleg_state_mkkey, FALSE);
1556
1557 rfs4x_state_init_locked(nsrv4);
1558
1559 mutex_exit(&nsrv4->state_lock);
1560
1561 /*
1562 * Init the stable storage.
1563 */
1564 rfs4_ss_init(nsrv4);
1565 }
1566
1567 /*
1568 * Used at server shutdown to cleanup all of NFSv4 server's zone structures
1569 * and state.
1570 */
1571 void
rfs4_state_zone_fini(void)1572 rfs4_state_zone_fini(void)
1573 {
1574 rfs4_database_t *dbp;
1575 nfs4_srv_t *nsrv4;
1576 nsrv4 = nfs4_get_srv();
1577
1578 rfs4_set_deleg_policy(nsrv4, SRV_NEVER_DELEGATE);
1579
1580 /*
1581 * Clean up any dangling stable storage structures BEFORE calling
1582 * rfs4_servinst_destroy_all() so there are no dangling structures
1583 * (i.e. the srvinsts are all cleared of danglers BEFORE they get
1584 * freed).
1585 */
1586 rfs4_ss_fini(nsrv4);
1587
1588 mutex_enter(&nsrv4->state_lock);
1589
1590 if (nsrv4->nfs4_server_state == NULL) {
1591 mutex_exit(&nsrv4->state_lock);
1592 return;
1593 }
1594
1595 rfs4x_state_fini(nsrv4);
1596
1597 /* destroy server instances and current instance ptr */
1598 rfs4_servinst_destroy_all(nsrv4);
1599
1600 /* reset the "first NFSv4 request" status */
1601 nsrv4->seen_first_compound = 0;
1602
1603 dbp = nsrv4->nfs4_server_state;
1604 nsrv4->nfs4_server_state = NULL;
1605
1606 rw_destroy(&nsrv4->rfs4_findclient_lock);
1607
1608 /* First stop all of the reaper threads in the database */
1609 rfs4_database_shutdown(dbp);
1610
1611 /*
1612 * WARNING: There may be consumers of the rfs4 database still
1613 * active as we destroy these. IF that's the case, consider putting
1614 * some of their _zone_fini()-like functions into the zsd key as
1615 * ~~SHUTDOWN~~ functions instead of ~~DESTROY~~ functions. We can
1616 * maintain some ordering guarantees better that way.
1617 */
1618 /* Now destroy/release the database tables */
1619 rfs4_database_destroy(dbp);
1620
1621 /* Reset the cache timers for next time */
1622 nsrv4->rfs4_client_cache_time = 0;
1623 nsrv4->rfs4_openowner_cache_time = 0;
1624 nsrv4->rfs4_state_cache_time = 0;
1625 nsrv4->rfs4_lo_state_cache_time = 0;
1626 nsrv4->rfs4_lockowner_cache_time = 0;
1627 nsrv4->rfs4_file_cache_time = 0;
1628 nsrv4->rfs4_deleg_state_cache_time = 0;
1629
1630 mutex_exit(&nsrv4->state_lock);
1631 }
1632
1633 typedef union {
1634 struct {
1635 uint32_t start_time;
1636 uint32_t c_id;
1637 } impl_id;
1638 clientid4 id4;
1639 } cid;
1640
1641 static int foreign_stateid(stateid_t *id);
1642 static int foreign_clientid(cid *cidp);
1643 static void embed_nodeid(cid *cidp);
1644
1645 typedef union {
1646 struct {
1647 uint32_t c_id;
1648 uint32_t gen_num;
1649 } cv_impl;
1650 verifier4 confirm_verf;
1651 } scid_confirm_verf;
1652
1653 static uint32_t
clientid_hash(void * key)1654 clientid_hash(void *key)
1655 {
1656 cid *idp = key;
1657
1658 return (idp->impl_id.c_id);
1659 }
1660
1661 static bool_t
clientid_compare(rfs4_entry_t entry,void * key)1662 clientid_compare(rfs4_entry_t entry, void *key)
1663 {
1664 rfs4_client_t *cp = (rfs4_client_t *)entry;
1665 clientid4 *idp = key;
1666
1667 return (*idp == cp->rc_clientid);
1668 }
1669
1670 static void *
clientid_mkkey(rfs4_entry_t entry)1671 clientid_mkkey(rfs4_entry_t entry)
1672 {
1673 rfs4_client_t *cp = (rfs4_client_t *)entry;
1674
1675 return (&cp->rc_clientid);
1676 }
1677
1678 static uint32_t
nfsclnt_hash(void * key)1679 nfsclnt_hash(void *key)
1680 {
1681 nfs_client_id4 *client = key;
1682 int i;
1683 uint32_t hash = 0;
1684
1685 for (i = 0; i < client->id_len; i++) {
1686 hash <<= 1;
1687 hash += (uint_t)client->id_val[i];
1688 }
1689 return (hash);
1690 }
1691
1692
1693 static bool_t
nfsclnt_compare(rfs4_entry_t entry,void * key)1694 nfsclnt_compare(rfs4_entry_t entry, void *key)
1695 {
1696 rfs4_client_t *cp = (rfs4_client_t *)entry;
1697 nfs_client_id4 *nfs_client = key;
1698
1699 if (cp->rc_nfs_client.id_len != nfs_client->id_len)
1700 return (FALSE);
1701
1702 return (bcmp(cp->rc_nfs_client.id_val, nfs_client->id_val,
1703 nfs_client->id_len) == 0);
1704 }
1705
1706 static void *
nfsclnt_mkkey(rfs4_entry_t entry)1707 nfsclnt_mkkey(rfs4_entry_t entry)
1708 {
1709 rfs4_client_t *cp = (rfs4_client_t *)entry;
1710
1711 return (&cp->rc_nfs_client);
1712 }
1713
1714 static bool_t
rfs4_client_expiry(rfs4_entry_t u_entry)1715 rfs4_client_expiry(rfs4_entry_t u_entry)
1716 {
1717 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1718 bool_t cp_expired;
1719
1720 if (rfs4_dbe_is_invalid(cp->rc_dbe)) {
1721 cp->rc_ss_remove = 1;
1722 return (TRUE);
1723 }
1724 /*
1725 * If the sysadmin has used clear_locks for this
1726 * entry then forced_expire will be set and we
1727 * want this entry to be reaped. Or the entry
1728 * has exceeded its lease period.
1729 */
1730 cp_expired = (cp->rc_forced_expire ||
1731 (gethrestime_sec() - cp->rc_last_access
1732 > rfs4_lease_time));
1733
1734 if (!cp->rc_ss_remove && cp_expired)
1735 cp->rc_ss_remove = 1;
1736 return (cp_expired);
1737 }
1738
1739 /*
1740 * Remove the leaf file from all distributed stable storage paths.
1741 */
1742 static void
rfs4_dss_remove_cpleaf(rfs4_client_t * cp)1743 rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
1744 {
1745 nfs4_srv_t *nsrv4;
1746 rfs4_servinst_t *sip;
1747 char *leaf = cp->rc_ss_pn->leaf;
1748
1749 /*
1750 * since the state files are written to all DSS
1751 * paths we must remove this leaf file instance
1752 * from all server instances.
1753 */
1754
1755 nsrv4 = nfs4_get_srv();
1756 mutex_enter(&nsrv4->servinst_lock);
1757 for (sip = nsrv4->nfs4_cur_servinst; sip != NULL; sip = sip->prev) {
1758 /* remove the leaf file associated with this server instance */
1759 rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf);
1760 }
1761 mutex_exit(&nsrv4->servinst_lock);
1762 }
1763
1764 static void
rfs4_dss_remove_leaf(rfs4_servinst_t * sip,char * dir_leaf,char * leaf)1765 rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf)
1766 {
1767 int i, npaths = sip->dss_npaths;
1768
1769 for (i = 0; i < npaths; i++) {
1770 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
1771 char *path, *dir;
1772 size_t pathlen;
1773
1774 /* the HA-NFSv4 path might have been failed-over away from us */
1775 if (dss_path == NULL)
1776 continue;
1777
1778 dir = dss_path->path;
1779
1780 /* allow 3 extra bytes for two '/' & a NUL */
1781 pathlen = strlen(dir) + strlen(dir_leaf) + strlen(leaf) + 3;
1782 path = kmem_alloc(pathlen, KM_SLEEP);
1783 (void) sprintf(path, "%s/%s/%s", dir, dir_leaf, leaf);
1784
1785 (void) vn_remove(path, UIO_SYSSPACE, RMFILE);
1786
1787 kmem_free(path, pathlen);
1788 }
1789 }
1790
1791 static void
rfs4_client_destroy(rfs4_entry_t u_entry)1792 rfs4_client_destroy(rfs4_entry_t u_entry)
1793 {
1794 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1795
1796 mutex_destroy(cp->rc_cbinfo.cb_lock);
1797 cv_destroy(cp->rc_cbinfo.cb_cv);
1798 cv_destroy(cp->rc_cbinfo.cb_cv_nullcaller);
1799 list_destroy(&cp->rc_openownerlist);
1800
1801 list_destroy(&cp->rc_sessions);
1802
1803 /* free callback info */
1804 rfs4_cbinfo_free(&cp->rc_cbinfo);
1805
1806 if (cp->rc_cp_confirmed)
1807 rfs4_client_rele(cp->rc_cp_confirmed);
1808
1809 if (cp->rc_ss_pn) {
1810 /* check if the stable storage files need to be removed */
1811 if (cp->rc_ss_remove)
1812 rfs4_dss_remove_cpleaf(cp);
1813 rfs4_ss_pnfree(cp->rc_ss_pn);
1814 }
1815
1816 /* Free the client supplied client id */
1817 kmem_free(cp->rc_nfs_client.id_val, cp->rc_nfs_client.id_len);
1818
1819 if (cp->rc_sysidt != LM_NOSYSID)
1820 lm_free_sysidt(cp->rc_sysidt);
1821
1822 rfs4_free_cred_set(&cp->rc_cr_set);
1823 }
1824
1825 static bool_t
rfs4_client_create(rfs4_entry_t u_entry,void * arg)1826 rfs4_client_create(rfs4_entry_t u_entry, void *arg)
1827 {
1828 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1829 nfs_client_id4 *client = (nfs_client_id4 *)arg;
1830 struct sockaddr *ca;
1831 cid *cidp;
1832 scid_confirm_verf *scvp;
1833 nfs4_srv_t *nsrv4;
1834
1835 nsrv4 = nfs4_get_srv();
1836
1837 /* Get a clientid to give to the client */
1838 cidp = (cid *)&cp->rc_clientid;
1839 cidp->impl_id.start_time = nsrv4->rfs4_start_time;
1840 cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe);
1841
1842 /* If we are booted as a cluster node, embed our nodeid */
1843 if (cluster_bootflags & CLUSTER_BOOTED)
1844 embed_nodeid(cidp);
1845
1846 /* Allocate and copy client's client id value */
1847 cp->rc_nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP);
1848 cp->rc_nfs_client.id_len = client->id_len;
1849 bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len);
1850 cp->rc_nfs_client.verifier = client->verifier;
1851
1852 /* Copy client's IP address */
1853 ca = client->cl_addr;
1854 if (ca->sa_family == AF_INET)
1855 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in));
1856 else if (ca->sa_family == AF_INET6)
1857 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in6));
1858 cp->rc_nfs_client.cl_addr = (struct sockaddr *)&cp->rc_addr;
1859
1860 /* Init the value for the SETCLIENTID_CONFIRM verifier */
1861 scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1862 scvp->cv_impl.c_id = cidp->impl_id.c_id;
1863 scvp->cv_impl.gen_num = 0;
1864
1865 /* An F_UNLKSYS has been done for this client */
1866 cp->rc_unlksys_completed = FALSE;
1867
1868 /* We need the client to ack us */
1869 cp->rc_need_confirm = TRUE;
1870 cp->rc_cp_confirmed = NULL;
1871 cp->rc_destroying = FALSE;
1872
1873 /* TRUE all the time until the callback path actually fails */
1874 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
1875
1876 /* Initialize the access time to now */
1877 cp->rc_last_access = gethrestime_sec();
1878
1879 bzero(&cp->rc_cr_set, sizeof (cred_set_t));
1880
1881 cp->rc_sysidt = LM_NOSYSID;
1882
1883 list_create(&cp->rc_openownerlist, sizeof (rfs4_openowner_t),
1884 offsetof(rfs4_openowner_t, ro_node));
1885
1886 list_create(&cp->rc_sessions, sizeof (rfs4_session_t),
1887 offsetof(rfs4_session_t, sn_node));
1888
1889 /* set up the callback control structure */
1890 cp->rc_cbinfo.cb_state = CB_UNINIT;
1891 mutex_init(cp->rc_cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL);
1892 cv_init(cp->rc_cbinfo.cb_cv, NULL, CV_DEFAULT, NULL);
1893 cv_init(cp->rc_cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL);
1894
1895 /*
1896 * Associate the client_t with the current server instance.
1897 * The hold is solely to satisfy the calling requirement of
1898 * rfs4_servinst_assign(). In this case it's not strictly necessary.
1899 */
1900 rfs4_dbe_hold(cp->rc_dbe);
1901 rfs4_servinst_assign(nsrv4, cp, nsrv4->nfs4_cur_servinst);
1902 rfs4_dbe_rele(cp->rc_dbe);
1903
1904 /*
1905 * NFSv4.1: See rfc8881, Section 18.36.4, eir_sequenceid
1906 * "Before the server replies to that EXCHANGE_ID
1907 * operation, it initializes the client ID slot to be equal to
1908 * eir_sequenceid - 1 (accounting for underflow), and records a
1909 * contrived CREATE_SESSION result with a "cached" result of
1910 * NFS4ERR_SEQ_MISORDERED."
1911 */
1912 cp->rc_contrived.xi_sid = 1;
1913 cp->rc_contrived.cs_status = NFS4ERR_SEQ_MISORDERED;
1914
1915 return (TRUE);
1916 }
1917
1918 /*
1919 * Caller wants to generate/update the setclientid_confirm verifier
1920 * associated with a client. This is done during the SETCLIENTID
1921 * processing.
1922 */
1923 void
rfs4_client_scv_next(rfs4_client_t * cp)1924 rfs4_client_scv_next(rfs4_client_t *cp)
1925 {
1926 scid_confirm_verf *scvp;
1927
1928 /* Init the value for the SETCLIENTID_CONFIRM verifier */
1929 scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1930 scvp->cv_impl.gen_num++;
1931 }
1932
1933 void
rfs4_client_rele(rfs4_client_t * cp)1934 rfs4_client_rele(rfs4_client_t *cp)
1935 {
1936 rfs4_dbe_rele(cp->rc_dbe);
1937 }
1938
1939 rfs4_client_t *
rfs4_findclient(nfs_client_id4 * client,bool_t * create,rfs4_client_t * oldcp)1940 rfs4_findclient(nfs_client_id4 *client, bool_t *create, rfs4_client_t *oldcp)
1941 {
1942 rfs4_client_t *cp;
1943 nfs4_srv_t *nsrv4;
1944 nsrv4 = nfs4_get_srv();
1945
1946
1947 if (oldcp) {
1948 rw_enter(&nsrv4->rfs4_findclient_lock, RW_WRITER);
1949 rfs4_dbe_hide(oldcp->rc_dbe);
1950 } else {
1951 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1952 }
1953
1954 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_nfsclnt_idx, client,
1955 create, (void *)client, RFS4_DBS_VALID);
1956
1957 if (oldcp)
1958 rfs4_dbe_unhide(oldcp->rc_dbe);
1959
1960 rw_exit(&nsrv4->rfs4_findclient_lock);
1961
1962 return (cp);
1963 }
1964
1965 rfs4_client_t *
rfs4_findclient_by_id(clientid4 clientid,bool_t find_unconfirmed)1966 rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed)
1967 {
1968 rfs4_client_t *cp;
1969 bool_t create = FALSE;
1970 cid *cidp = (cid *)&clientid;
1971 nfs4_srv_t *nsrv4 = nfs4_get_srv();
1972
1973 /* If we're a cluster and the nodeid isn't right, short-circuit */
1974 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
1975 return (NULL);
1976
1977 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
1978
1979 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx, &clientid,
1980 &create, NULL, RFS4_DBS_VALID);
1981
1982 rw_exit(&nsrv4->rfs4_findclient_lock);
1983
1984 if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) {
1985 rfs4_client_rele(cp);
1986 return (NULL);
1987 } else {
1988 return (cp);
1989 }
1990 }
1991
1992 static uint32_t
clntip_hash(void * key)1993 clntip_hash(void *key)
1994 {
1995 struct sockaddr *addr = key;
1996 int i, len = 0;
1997 uint32_t hash = 0;
1998 char *ptr;
1999
2000 if (addr->sa_family == AF_INET) {
2001 struct sockaddr_in *a = (struct sockaddr_in *)addr;
2002 len = sizeof (struct in_addr);
2003 ptr = (char *)&a->sin_addr;
2004 } else if (addr->sa_family == AF_INET6) {
2005 struct sockaddr_in6 *a = (struct sockaddr_in6 *)addr;
2006 len = sizeof (struct in6_addr);
2007 ptr = (char *)&a->sin6_addr;
2008 } else
2009 return (0);
2010
2011 for (i = 0; i < len; i++) {
2012 hash <<= 1;
2013 hash += (uint_t)ptr[i];
2014 }
2015 return (hash);
2016 }
2017
2018 static bool_t
clntip_compare(rfs4_entry_t entry,void * key)2019 clntip_compare(rfs4_entry_t entry, void *key)
2020 {
2021 rfs4_clntip_t *cp = (rfs4_clntip_t *)entry;
2022 struct sockaddr *addr = key;
2023 int len = 0;
2024 char *p1, *p2;
2025
2026 if (addr->sa_family == AF_INET) {
2027 struct sockaddr_in *a1 = (struct sockaddr_in *)&cp->ri_addr;
2028 struct sockaddr_in *a2 = (struct sockaddr_in *)addr;
2029 len = sizeof (struct in_addr);
2030 p1 = (char *)&a1->sin_addr;
2031 p2 = (char *)&a2->sin_addr;
2032 } else if (addr->sa_family == AF_INET6) {
2033 struct sockaddr_in6 *a1 = (struct sockaddr_in6 *)&cp->ri_addr;
2034 struct sockaddr_in6 *a2 = (struct sockaddr_in6 *)addr;
2035 len = sizeof (struct in6_addr);
2036 p1 = (char *)&a1->sin6_addr;
2037 p2 = (char *)&a2->sin6_addr;
2038 } else
2039 return (0);
2040
2041 return (bcmp(p1, p2, len) == 0);
2042 }
2043
2044 static void *
clntip_mkkey(rfs4_entry_t entry)2045 clntip_mkkey(rfs4_entry_t entry)
2046 {
2047 rfs4_clntip_t *cp = (rfs4_clntip_t *)entry;
2048
2049 return (&cp->ri_addr);
2050 }
2051
2052 static bool_t
rfs4_clntip_expiry(rfs4_entry_t u_entry)2053 rfs4_clntip_expiry(rfs4_entry_t u_entry)
2054 {
2055 rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
2056
2057 if (rfs4_dbe_is_invalid(cp->ri_dbe))
2058 return (TRUE);
2059 return (FALSE);
2060 }
2061
2062 /* ARGSUSED */
2063 static void
rfs4_clntip_destroy(rfs4_entry_t u_entry)2064 rfs4_clntip_destroy(rfs4_entry_t u_entry)
2065 {
2066 }
2067
2068 static bool_t
rfs4_clntip_create(rfs4_entry_t u_entry,void * arg)2069 rfs4_clntip_create(rfs4_entry_t u_entry, void *arg)
2070 {
2071 rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
2072 struct sockaddr *ca = (struct sockaddr *)arg;
2073
2074 /* Copy client's IP address */
2075 if (ca->sa_family == AF_INET)
2076 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in));
2077 else if (ca->sa_family == AF_INET6)
2078 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in6));
2079 else
2080 return (FALSE);
2081 cp->ri_no_referrals = 1;
2082
2083 return (TRUE);
2084 }
2085
2086 rfs4_clntip_t *
rfs4_find_clntip(struct sockaddr * addr,bool_t * create)2087 rfs4_find_clntip(struct sockaddr *addr, bool_t *create)
2088 {
2089 rfs4_clntip_t *cp;
2090 nfs4_srv_t *nsrv4;
2091
2092 nsrv4 = nfs4_get_srv();
2093
2094 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2095
2096 cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
2097 create, addr, RFS4_DBS_VALID);
2098
2099 rw_exit(&nsrv4->rfs4_findclient_lock);
2100
2101 return (cp);
2102 }
2103
2104 void
rfs4_invalidate_clntip(struct sockaddr * addr)2105 rfs4_invalidate_clntip(struct sockaddr *addr)
2106 {
2107 rfs4_clntip_t *cp;
2108 bool_t create = FALSE;
2109 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2110
2111 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2112
2113 cp = (rfs4_clntip_t *)rfs4_dbsearch(nsrv4->rfs4_clntip_idx, addr,
2114 &create, NULL, RFS4_DBS_VALID);
2115 if (cp == NULL) {
2116 rw_exit(&nsrv4->rfs4_findclient_lock);
2117 return;
2118 }
2119 rfs4_dbe_invalidate(cp->ri_dbe);
2120 rfs4_dbe_rele(cp->ri_dbe);
2121
2122 rw_exit(&nsrv4->rfs4_findclient_lock);
2123 }
2124
2125 bool_t
rfs4_lease_expired(rfs4_client_t * cp)2126 rfs4_lease_expired(rfs4_client_t *cp)
2127 {
2128 bool_t rc;
2129
2130 rfs4_dbe_lock(cp->rc_dbe);
2131
2132 /*
2133 * If the admin has executed clear_locks for this
2134 * client id, force expire will be set, so no need
2135 * to calculate anything because it's "outa here".
2136 */
2137 if (cp->rc_forced_expire) {
2138 rc = TRUE;
2139 } else {
2140 rc = (gethrestime_sec() - cp->rc_last_access > rfs4_lease_time);
2141 }
2142
2143 /*
2144 * If the lease has expired we will also want
2145 * to remove any stable storage state data. So
2146 * mark the client id accordingly.
2147 */
2148 if (!cp->rc_ss_remove)
2149 cp->rc_ss_remove = (rc == TRUE);
2150
2151 rfs4_dbe_unlock(cp->rc_dbe);
2152
2153 return (rc);
2154 }
2155
2156 void
rfs4_update_lease(rfs4_client_t * cp)2157 rfs4_update_lease(rfs4_client_t *cp)
2158 {
2159 rfs4_dbe_lock(cp->rc_dbe);
2160 if (!cp->rc_forced_expire)
2161 cp->rc_last_access = gethrestime_sec();
2162 rfs4_dbe_unlock(cp->rc_dbe);
2163 }
2164
2165
2166 static bool_t
EQOPENOWNER(open_owner4 * a,open_owner4 * b)2167 EQOPENOWNER(open_owner4 *a, open_owner4 *b)
2168 {
2169 bool_t rc;
2170
2171 if (a->clientid != b->clientid)
2172 return (FALSE);
2173
2174 if (a->owner_len != b->owner_len)
2175 return (FALSE);
2176
2177 rc = (bcmp(a->owner_val, b->owner_val, a->owner_len) == 0);
2178
2179 return (rc);
2180 }
2181
2182 static uint_t
openowner_hash(void * key)2183 openowner_hash(void *key)
2184 {
2185 int i;
2186 open_owner4 *openowner = key;
2187 uint_t hash = 0;
2188
2189 for (i = 0; i < openowner->owner_len; i++) {
2190 hash <<= 4;
2191 hash += (uint_t)openowner->owner_val[i];
2192 }
2193 hash += (uint_t)openowner->clientid;
2194 hash |= (openowner->clientid >> 32);
2195
2196 return (hash);
2197 }
2198
2199 static bool_t
openowner_compare(rfs4_entry_t u_entry,void * key)2200 openowner_compare(rfs4_entry_t u_entry, void *key)
2201 {
2202 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2203 open_owner4 *arg = key;
2204
2205 return (EQOPENOWNER(&oo->ro_owner, arg));
2206 }
2207
2208 void *
openowner_mkkey(rfs4_entry_t u_entry)2209 openowner_mkkey(rfs4_entry_t u_entry)
2210 {
2211 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2212
2213 return (&oo->ro_owner);
2214 }
2215
2216 /* ARGSUSED */
2217 static bool_t
rfs4_openowner_expiry(rfs4_entry_t u_entry)2218 rfs4_openowner_expiry(rfs4_entry_t u_entry)
2219 {
2220 /* openstateid held us and did all needed delay */
2221 return (TRUE);
2222 }
2223
2224 static void
rfs4_openowner_destroy(rfs4_entry_t u_entry)2225 rfs4_openowner_destroy(rfs4_entry_t u_entry)
2226 {
2227 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2228
2229 /* Remove open owner from client's lists of open owners */
2230 rfs4_dbe_lock(oo->ro_client->rc_dbe);
2231 list_remove(&oo->ro_client->rc_openownerlist, oo);
2232 rfs4_dbe_unlock(oo->ro_client->rc_dbe);
2233
2234 /* One less reference to the client */
2235 rfs4_client_rele(oo->ro_client);
2236 oo->ro_client = NULL;
2237
2238 /* Free the last reply for this lock owner */
2239 rfs4_free_reply(&oo->ro_reply);
2240
2241 if (oo->ro_reply_fh.nfs_fh4_val) {
2242 kmem_free(oo->ro_reply_fh.nfs_fh4_val,
2243 oo->ro_reply_fh.nfs_fh4_len);
2244 oo->ro_reply_fh.nfs_fh4_val = NULL;
2245 oo->ro_reply_fh.nfs_fh4_len = 0;
2246 }
2247
2248 rfs4_sw_destroy(&oo->ro_sw);
2249 list_destroy(&oo->ro_statelist);
2250
2251 /* Free the lock owner id */
2252 kmem_free(oo->ro_owner.owner_val, oo->ro_owner.owner_len);
2253 }
2254
2255 void
rfs4_openowner_rele(rfs4_openowner_t * oo)2256 rfs4_openowner_rele(rfs4_openowner_t *oo)
2257 {
2258 rfs4_dbe_rele(oo->ro_dbe);
2259 }
2260
2261 static bool_t
rfs4_openowner_create(rfs4_entry_t u_entry,void * arg)2262 rfs4_openowner_create(rfs4_entry_t u_entry, void *arg)
2263 {
2264 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2265 rfs4_openowner_t *argp = (rfs4_openowner_t *)arg;
2266 open_owner4 *openowner = &argp->ro_owner;
2267 seqid4 seqid = argp->ro_open_seqid;
2268 rfs4_client_t *cp;
2269 bool_t create = FALSE;
2270 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2271
2272 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2273
2274 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2275 &openowner->clientid,
2276 &create, NULL, RFS4_DBS_VALID);
2277
2278 rw_exit(&nsrv4->rfs4_findclient_lock);
2279
2280 if (cp == NULL)
2281 return (FALSE);
2282
2283 oo->ro_reply_fh.nfs_fh4_len = 0;
2284 oo->ro_reply_fh.nfs_fh4_val = NULL;
2285
2286 oo->ro_owner.clientid = openowner->clientid;
2287 oo->ro_owner.owner_val =
2288 kmem_alloc(openowner->owner_len, KM_SLEEP);
2289
2290 bcopy(openowner->owner_val,
2291 oo->ro_owner.owner_val, openowner->owner_len);
2292
2293 oo->ro_owner.owner_len = openowner->owner_len;
2294
2295 oo->ro_need_confirm = TRUE;
2296
2297 rfs4_sw_init(&oo->ro_sw);
2298
2299 oo->ro_open_seqid = seqid;
2300 bzero(&oo->ro_reply, sizeof (nfs_resop4));
2301 oo->ro_client = cp;
2302
2303 list_create(&oo->ro_statelist, sizeof (rfs4_state_t),
2304 offsetof(rfs4_state_t, rs_node));
2305
2306 /* Insert openowner into client's open owner list */
2307 rfs4_dbe_lock(cp->rc_dbe);
2308 list_insert_tail(&cp->rc_openownerlist, oo);
2309 rfs4_dbe_unlock(cp->rc_dbe);
2310
2311 return (TRUE);
2312 }
2313
2314 rfs4_openowner_t *
rfs4_findopenowner(open_owner4 * openowner,bool_t * create,seqid4 seqid)2315 rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid)
2316 {
2317 rfs4_openowner_t *oo;
2318 rfs4_openowner_t arg;
2319 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2320
2321 arg.ro_owner = *openowner;
2322 arg.ro_open_seqid = seqid;
2323 /* CSTYLED */
2324 oo = (rfs4_openowner_t *)rfs4_dbsearch(nsrv4->rfs4_openowner_idx, openowner,
2325 create, &arg, RFS4_DBS_VALID);
2326
2327 return (oo);
2328 }
2329
2330 void
rfs4_update_open_sequence(rfs4_openowner_t * oo)2331 rfs4_update_open_sequence(rfs4_openowner_t *oo)
2332 {
2333
2334 rfs4_dbe_lock(oo->ro_dbe);
2335
2336 oo->ro_open_seqid++;
2337
2338 rfs4_dbe_unlock(oo->ro_dbe);
2339 }
2340
2341 void
rfs4_update_open_resp(rfs4_openowner_t * oo,nfs_resop4 * resp,nfs_fh4 * fh)2342 rfs4_update_open_resp(rfs4_openowner_t *oo, nfs_resop4 *resp, nfs_fh4 *fh)
2343 {
2344
2345 rfs4_dbe_lock(oo->ro_dbe);
2346
2347 rfs4_free_reply(&oo->ro_reply);
2348
2349 rfs4_copy_reply(&oo->ro_reply, resp);
2350
2351 /* Save the filehandle if provided and free if not used */
2352 if (resp->nfs_resop4_u.opopen.status == NFS4_OK &&
2353 fh && fh->nfs_fh4_len) {
2354 if (oo->ro_reply_fh.nfs_fh4_val == NULL)
2355 oo->ro_reply_fh.nfs_fh4_val =
2356 kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
2357 nfs_fh4_copy(fh, &oo->ro_reply_fh);
2358 } else {
2359 if (oo->ro_reply_fh.nfs_fh4_val) {
2360 kmem_free(oo->ro_reply_fh.nfs_fh4_val,
2361 oo->ro_reply_fh.nfs_fh4_len);
2362 oo->ro_reply_fh.nfs_fh4_val = NULL;
2363 oo->ro_reply_fh.nfs_fh4_len = 0;
2364 }
2365 }
2366
2367 rfs4_dbe_unlock(oo->ro_dbe);
2368 }
2369
2370 static bool_t
lockowner_compare(rfs4_entry_t u_entry,void * key)2371 lockowner_compare(rfs4_entry_t u_entry, void *key)
2372 {
2373 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2374 lock_owner4 *b = (lock_owner4 *)key;
2375
2376 if (lo->rl_owner.clientid != b->clientid)
2377 return (FALSE);
2378
2379 if (lo->rl_owner.owner_len != b->owner_len)
2380 return (FALSE);
2381
2382 return (bcmp(lo->rl_owner.owner_val, b->owner_val,
2383 lo->rl_owner.owner_len) == 0);
2384 }
2385
2386 void *
lockowner_mkkey(rfs4_entry_t u_entry)2387 lockowner_mkkey(rfs4_entry_t u_entry)
2388 {
2389 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2390
2391 return (&lo->rl_owner);
2392 }
2393
2394 static uint32_t
lockowner_hash(void * key)2395 lockowner_hash(void *key)
2396 {
2397 int i;
2398 lock_owner4 *lockowner = key;
2399 uint_t hash = 0;
2400
2401 for (i = 0; i < lockowner->owner_len; i++) {
2402 hash <<= 4;
2403 hash += (uint_t)lockowner->owner_val[i];
2404 }
2405 hash += (uint_t)lockowner->clientid;
2406 hash |= (lockowner->clientid >> 32);
2407
2408 return (hash);
2409 }
2410
2411 static uint32_t
pid_hash(void * key)2412 pid_hash(void *key)
2413 {
2414 return ((uint32_t)(uintptr_t)key);
2415 }
2416
2417 static void *
pid_mkkey(rfs4_entry_t u_entry)2418 pid_mkkey(rfs4_entry_t u_entry)
2419 {
2420 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2421
2422 return ((void *)(uintptr_t)lo->rl_pid);
2423 }
2424
2425 static bool_t
pid_compare(rfs4_entry_t u_entry,void * key)2426 pid_compare(rfs4_entry_t u_entry, void *key)
2427 {
2428 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2429
2430 return (lo->rl_pid == (pid_t)(uintptr_t)key);
2431 }
2432
2433 static void
rfs4_lockowner_destroy(rfs4_entry_t u_entry)2434 rfs4_lockowner_destroy(rfs4_entry_t u_entry)
2435 {
2436 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2437
2438 /* Free the lock owner id */
2439 kmem_free(lo->rl_owner.owner_val, lo->rl_owner.owner_len);
2440 rfs4_client_rele(lo->rl_client);
2441 }
2442
2443 void
rfs4_lockowner_rele(rfs4_lockowner_t * lo)2444 rfs4_lockowner_rele(rfs4_lockowner_t *lo)
2445 {
2446 rfs4_dbe_rele(lo->rl_dbe);
2447 }
2448
2449 /* ARGSUSED */
2450 static bool_t
rfs4_lockowner_expiry(rfs4_entry_t u_entry)2451 rfs4_lockowner_expiry(rfs4_entry_t u_entry)
2452 {
2453 /*
2454 * Since expiry is called with no other references on
2455 * this struct, go ahead and have it removed.
2456 */
2457 return (TRUE);
2458 }
2459
2460 static bool_t
rfs4_lockowner_create(rfs4_entry_t u_entry,void * arg)2461 rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg)
2462 {
2463 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2464 lock_owner4 *lockowner = (lock_owner4 *)arg;
2465 rfs4_client_t *cp;
2466 bool_t create = FALSE;
2467 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2468
2469 rw_enter(&nsrv4->rfs4_findclient_lock, RW_READER);
2470
2471 cp = (rfs4_client_t *)rfs4_dbsearch(nsrv4->rfs4_clientid_idx,
2472 &lockowner->clientid,
2473 &create, NULL, RFS4_DBS_VALID);
2474
2475 rw_exit(&nsrv4->rfs4_findclient_lock);
2476
2477 if (cp == NULL)
2478 return (FALSE);
2479
2480 /* Reference client */
2481 lo->rl_client = cp;
2482 lo->rl_owner.clientid = lockowner->clientid;
2483 lo->rl_owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP);
2484 bcopy(lockowner->owner_val, lo->rl_owner.owner_val,
2485 lockowner->owner_len);
2486 lo->rl_owner.owner_len = lockowner->owner_len;
2487 lo->rl_pid = rfs4_dbe_getid(lo->rl_dbe);
2488
2489 return (TRUE);
2490 }
2491
2492 rfs4_lockowner_t *
rfs4_findlockowner(lock_owner4 * lockowner,bool_t * create)2493 rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create)
2494 {
2495 rfs4_lockowner_t *lo;
2496 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2497
2498 /* CSTYLED */
2499 lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_idx, lockowner,
2500 create, lockowner, RFS4_DBS_VALID);
2501
2502 return (lo);
2503 }
2504
2505 rfs4_lockowner_t *
rfs4_findlockowner_by_pid(pid_t pid)2506 rfs4_findlockowner_by_pid(pid_t pid)
2507 {
2508 rfs4_lockowner_t *lo;
2509 bool_t create = FALSE;
2510 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2511
2512 lo = (rfs4_lockowner_t *)rfs4_dbsearch(nsrv4->rfs4_lockowner_pid_idx,
2513 (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID);
2514
2515 return (lo);
2516 }
2517
2518
2519 static uint32_t
file_hash(void * key)2520 file_hash(void *key)
2521 {
2522 return (ADDRHASH(key));
2523 }
2524
2525 static void *
file_mkkey(rfs4_entry_t u_entry)2526 file_mkkey(rfs4_entry_t u_entry)
2527 {
2528 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2529
2530 return (fp->rf_vp);
2531 }
2532
2533 static bool_t
file_compare(rfs4_entry_t u_entry,void * key)2534 file_compare(rfs4_entry_t u_entry, void *key)
2535 {
2536 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2537
2538 return (fp->rf_vp == (vnode_t *)key);
2539 }
2540
2541 static void
rfs4_file_destroy(rfs4_entry_t u_entry)2542 rfs4_file_destroy(rfs4_entry_t u_entry)
2543 {
2544 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2545
2546 list_destroy(&fp->rf_delegstatelist);
2547
2548 if (fp->rf_filehandle.nfs_fh4_val)
2549 kmem_free(fp->rf_filehandle.nfs_fh4_val,
2550 fp->rf_filehandle.nfs_fh4_len);
2551 cv_destroy(fp->rf_dinfo.rd_recall_cv);
2552 if (fp->rf_vp) {
2553 vnode_t *vp = fp->rf_vp;
2554
2555 mutex_enter(&vp->v_vsd_lock);
2556 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
2557 mutex_exit(&vp->v_vsd_lock);
2558 VN_RELE(vp);
2559 fp->rf_vp = NULL;
2560 }
2561 rw_destroy(&fp->rf_file_rwlock);
2562 }
2563
2564 /*
2565 * Used to unlock the underlying dbe struct only
2566 */
2567 void
rfs4_file_rele(rfs4_file_t * fp)2568 rfs4_file_rele(rfs4_file_t *fp)
2569 {
2570 rfs4_dbe_rele(fp->rf_dbe);
2571 }
2572
2573 typedef struct {
2574 vnode_t *vp;
2575 nfs_fh4 *fh;
2576 } rfs4_fcreate_arg;
2577
2578 static bool_t
rfs4_file_create(rfs4_entry_t u_entry,void * arg)2579 rfs4_file_create(rfs4_entry_t u_entry, void *arg)
2580 {
2581 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2582 rfs4_fcreate_arg *ap = (rfs4_fcreate_arg *)arg;
2583 vnode_t *vp = ap->vp;
2584 nfs_fh4 *fh = ap->fh;
2585
2586 VN_HOLD(vp);
2587
2588 fp->rf_filehandle.nfs_fh4_len = 0;
2589 fp->rf_filehandle.nfs_fh4_val = NULL;
2590 ASSERT(fh && fh->nfs_fh4_len);
2591 if (fh && fh->nfs_fh4_len) {
2592 fp->rf_filehandle.nfs_fh4_val =
2593 kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
2594 nfs_fh4_copy(fh, &fp->rf_filehandle);
2595 }
2596 fp->rf_vp = vp;
2597
2598 list_create(&fp->rf_delegstatelist, sizeof (rfs4_deleg_state_t),
2599 offsetof(rfs4_deleg_state_t, rds_node));
2600
2601 fp->rf_share_deny = fp->rf_share_access = fp->rf_access_read = 0;
2602 fp->rf_access_write = fp->rf_deny_read = fp->rf_deny_write = 0;
2603
2604 mutex_init(fp->rf_dinfo.rd_recall_lock, NULL, MUTEX_DEFAULT, NULL);
2605 cv_init(fp->rf_dinfo.rd_recall_cv, NULL, CV_DEFAULT, NULL);
2606
2607 fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
2608
2609 rw_init(&fp->rf_file_rwlock, NULL, RW_DEFAULT, NULL);
2610
2611 mutex_enter(&vp->v_vsd_lock);
2612 VERIFY(vsd_set(vp, nfs4_srv_vkey, (void *)fp) == 0);
2613 mutex_exit(&vp->v_vsd_lock);
2614
2615 return (TRUE);
2616 }
2617
2618 rfs4_file_t *
rfs4_findfile(vnode_t * vp,nfs_fh4 * fh,bool_t * create)2619 rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2620 {
2621 rfs4_file_t *fp;
2622 rfs4_fcreate_arg arg;
2623 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2624
2625 arg.vp = vp;
2626 arg.fh = fh;
2627
2628 if (*create == TRUE)
2629 /* CSTYLED */
2630 fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp, create,
2631 &arg, RFS4_DBS_VALID);
2632 else {
2633 mutex_enter(&vp->v_vsd_lock);
2634 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2635 if (fp) {
2636 rfs4_dbe_lock(fp->rf_dbe);
2637 if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2638 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2639 rfs4_dbe_unlock(fp->rf_dbe);
2640 fp = NULL;
2641 } else {
2642 rfs4_dbe_hold(fp->rf_dbe);
2643 rfs4_dbe_unlock(fp->rf_dbe);
2644 }
2645 }
2646 mutex_exit(&vp->v_vsd_lock);
2647 }
2648 return (fp);
2649 }
2650
2651 /*
2652 * Find a file in the db and once it is located, take the rw lock.
2653 * Need to check the vnode pointer and if it does not exist (it was
2654 * removed between the db location and check) redo the find. This
2655 * assumes that a file struct that has a NULL vnode pointer is marked
2656 * at 'invalid' and will not be found in the db the second time
2657 * around.
2658 */
2659 rfs4_file_t *
rfs4_findfile_withlock(vnode_t * vp,nfs_fh4 * fh,bool_t * create)2660 rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2661 {
2662 rfs4_file_t *fp;
2663 rfs4_fcreate_arg arg;
2664 bool_t screate = *create;
2665 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2666
2667 if (screate == FALSE) {
2668 mutex_enter(&vp->v_vsd_lock);
2669 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2670 if (fp) {
2671 rfs4_dbe_lock(fp->rf_dbe);
2672 if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2673 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2674 rfs4_dbe_unlock(fp->rf_dbe);
2675 mutex_exit(&vp->v_vsd_lock);
2676 fp = NULL;
2677 } else {
2678 rfs4_dbe_hold(fp->rf_dbe);
2679 rfs4_dbe_unlock(fp->rf_dbe);
2680 mutex_exit(&vp->v_vsd_lock);
2681 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2682 if (fp->rf_vp == NULL) {
2683 rw_exit(&fp->rf_file_rwlock);
2684 rfs4_file_rele(fp);
2685 fp = NULL;
2686 }
2687 }
2688 } else {
2689 mutex_exit(&vp->v_vsd_lock);
2690 }
2691 } else {
2692 retry:
2693 arg.vp = vp;
2694 arg.fh = fh;
2695
2696 fp = (rfs4_file_t *)rfs4_dbsearch(nsrv4->rfs4_file_idx, vp,
2697 create, &arg, RFS4_DBS_VALID);
2698 if (fp != NULL) {
2699 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2700 if (fp->rf_vp == NULL) {
2701 rw_exit(&fp->rf_file_rwlock);
2702 rfs4_file_rele(fp);
2703 *create = screate;
2704 goto retry;
2705 }
2706 }
2707 }
2708
2709 return (fp);
2710 }
2711
2712 static uint32_t
lo_state_hash(void * key)2713 lo_state_hash(void *key)
2714 {
2715 stateid_t *id = key;
2716
2717 return (id->bits.ident+id->bits.pid);
2718 }
2719
2720 static bool_t
lo_state_compare(rfs4_entry_t u_entry,void * key)2721 lo_state_compare(rfs4_entry_t u_entry, void *key)
2722 {
2723 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2724 stateid_t *id = key;
2725 bool_t rc;
2726
2727 rc = (lsp->rls_lockid.bits.boottime == id->bits.boottime &&
2728 lsp->rls_lockid.bits.type == id->bits.type &&
2729 lsp->rls_lockid.bits.ident == id->bits.ident &&
2730 lsp->rls_lockid.bits.pid == id->bits.pid);
2731
2732 return (rc);
2733 }
2734
2735 static void *
lo_state_mkkey(rfs4_entry_t u_entry)2736 lo_state_mkkey(rfs4_entry_t u_entry)
2737 {
2738 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2739
2740 return (&lsp->rls_lockid);
2741 }
2742
2743 static bool_t
rfs4_lo_state_expiry(rfs4_entry_t u_entry)2744 rfs4_lo_state_expiry(rfs4_entry_t u_entry)
2745 {
2746 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2747
2748 if (rfs4_dbe_is_invalid(lsp->rls_dbe))
2749 return (TRUE);
2750 if (lsp->rls_state->rs_closed)
2751 return (TRUE);
2752 return ((gethrestime_sec() -
2753 lsp->rls_state->rs_owner->ro_client->rc_last_access
2754 > rfs4_lease_time));
2755 }
2756
2757 static void
rfs4_lo_state_destroy(rfs4_entry_t u_entry)2758 rfs4_lo_state_destroy(rfs4_entry_t u_entry)
2759 {
2760 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2761
2762 rfs4_dbe_lock(lsp->rls_state->rs_dbe);
2763 list_remove(&lsp->rls_state->rs_lostatelist, lsp);
2764 rfs4_dbe_unlock(lsp->rls_state->rs_dbe);
2765
2766 rfs4_sw_destroy(&lsp->rls_sw);
2767
2768 /* Make sure to release the file locks */
2769 if (lsp->rls_locks_cleaned == FALSE) {
2770 lsp->rls_locks_cleaned = TRUE;
2771 if (lsp->rls_locker->rl_client->rc_sysidt != LM_NOSYSID) {
2772 /* Is the PxFS kernel module loaded? */
2773 if (lm_remove_file_locks != NULL) {
2774 int new_sysid;
2775
2776 /* Encode the cluster nodeid in new sysid */
2777 new_sysid =
2778 lsp->rls_locker->rl_client->rc_sysidt;
2779 lm_set_nlmid_flk(&new_sysid);
2780
2781 /*
2782 * This PxFS routine removes file locks for a
2783 * client over all nodes of a cluster.
2784 */
2785 DTRACE_PROBE1(nfss_i_clust_rm_lck,
2786 int, new_sysid);
2787 (*lm_remove_file_locks)(new_sysid);
2788 } else {
2789 (void) cleanlocks(
2790 lsp->rls_state->rs_finfo->rf_vp,
2791 lsp->rls_locker->rl_pid,
2792 lsp->rls_locker->rl_client->rc_sysidt);
2793 }
2794 }
2795 }
2796
2797 /* Free the last reply for this state */
2798 rfs4_free_reply(&lsp->rls_reply);
2799
2800 rfs4_lockowner_rele(lsp->rls_locker);
2801 lsp->rls_locker = NULL;
2802
2803 rfs4_state_rele_nounlock(lsp->rls_state);
2804 lsp->rls_state = NULL;
2805 }
2806
2807 static bool_t
rfs4_lo_state_create(rfs4_entry_t u_entry,void * arg)2808 rfs4_lo_state_create(rfs4_entry_t u_entry, void *arg)
2809 {
2810 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2811 rfs4_lo_state_t *argp = (rfs4_lo_state_t *)arg;
2812 rfs4_lockowner_t *lo = argp->rls_locker;
2813 rfs4_state_t *sp = argp->rls_state;
2814
2815 lsp->rls_state = sp;
2816
2817 lsp->rls_lockid = sp->rs_stateid;
2818 lsp->rls_lockid.bits.type = LOCKID;
2819 lsp->rls_lockid.bits.chgseq = 0;
2820 lsp->rls_lockid.bits.pid = lo->rl_pid;
2821
2822 lsp->rls_locks_cleaned = FALSE;
2823 lsp->rls_lock_completed = FALSE;
2824
2825 rfs4_sw_init(&lsp->rls_sw);
2826
2827 /* Attached the supplied lock owner */
2828 rfs4_dbe_hold(lo->rl_dbe);
2829 lsp->rls_locker = lo;
2830
2831 rfs4_dbe_lock(sp->rs_dbe);
2832 list_insert_tail(&sp->rs_lostatelist, lsp);
2833 rfs4_dbe_hold(sp->rs_dbe);
2834 rfs4_dbe_unlock(sp->rs_dbe);
2835
2836 return (TRUE);
2837 }
2838
2839 void
rfs4_lo_state_rele(rfs4_lo_state_t * lsp,bool_t unlock_fp)2840 rfs4_lo_state_rele(rfs4_lo_state_t *lsp, bool_t unlock_fp)
2841 {
2842 if (unlock_fp == TRUE)
2843 rw_exit(&lsp->rls_state->rs_finfo->rf_file_rwlock);
2844 rfs4_dbe_rele(lsp->rls_dbe);
2845 }
2846
2847 static rfs4_lo_state_t *
rfs4_findlo_state(stateid_t * id,bool_t lock_fp)2848 rfs4_findlo_state(stateid_t *id, bool_t lock_fp)
2849 {
2850 rfs4_lo_state_t *lsp;
2851 bool_t create = FALSE;
2852 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2853
2854 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_idx, id,
2855 &create, NULL, RFS4_DBS_VALID);
2856 if (lock_fp == TRUE && lsp != NULL)
2857 rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER);
2858
2859 return (lsp);
2860 }
2861
2862
2863 static uint32_t
lo_state_lo_hash(void * key)2864 lo_state_lo_hash(void *key)
2865 {
2866 rfs4_lo_state_t *lsp = key;
2867
2868 return (ADDRHASH(lsp->rls_locker) ^ ADDRHASH(lsp->rls_state));
2869 }
2870
2871 static bool_t
lo_state_lo_compare(rfs4_entry_t u_entry,void * key)2872 lo_state_lo_compare(rfs4_entry_t u_entry, void *key)
2873 {
2874 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2875 rfs4_lo_state_t *keyp = key;
2876
2877 return (keyp->rls_locker == lsp->rls_locker &&
2878 keyp->rls_state == lsp->rls_state);
2879 }
2880
2881 static void *
lo_state_lo_mkkey(rfs4_entry_t u_entry)2882 lo_state_lo_mkkey(rfs4_entry_t u_entry)
2883 {
2884 return (u_entry);
2885 }
2886
2887 rfs4_lo_state_t *
rfs4_findlo_state_by_owner(rfs4_lockowner_t * lo,rfs4_state_t * sp,bool_t * create)2888 rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp,
2889 bool_t *create)
2890 {
2891 rfs4_lo_state_t *lsp;
2892 rfs4_lo_state_t arg;
2893 nfs4_srv_t *nsrv4 = nfs4_get_srv();
2894
2895 arg.rls_locker = lo;
2896 arg.rls_state = sp;
2897
2898 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(nsrv4->rfs4_lo_state_owner_idx,
2899 &arg, create, &arg, RFS4_DBS_VALID);
2900
2901 return (lsp);
2902 }
2903
2904 static stateid_t
get_stateid(id_t eid)2905 get_stateid(id_t eid)
2906 {
2907 stateid_t id;
2908 nfs4_srv_t *nsrv4;
2909
2910 nsrv4 = nfs4_get_srv();
2911
2912 id.bits.boottime = nsrv4->rfs4_start_time;
2913 id.bits.ident = eid;
2914 id.bits.chgseq = 0;
2915 id.bits.type = 0;
2916 id.bits.pid = 0;
2917
2918 /*
2919 * If we are booted as a cluster node, embed our nodeid.
2920 * We've already done sanity checks in rfs4_client_create() so no
2921 * need to repeat them here.
2922 */
2923 id.bits.clnodeid = (cluster_bootflags & CLUSTER_BOOTED) ?
2924 clconf_get_nodeid() : 0;
2925
2926 return (id);
2927 }
2928
2929 /*
2930 * For use only when booted as a cluster node.
2931 * Returns TRUE if the embedded nodeid indicates that this stateid was
2932 * generated on another node.
2933 */
2934 static int
foreign_stateid(stateid_t * id)2935 foreign_stateid(stateid_t *id)
2936 {
2937 ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2938 return (id->bits.clnodeid != (uint32_t)clconf_get_nodeid());
2939 }
2940
2941 /*
2942 * For use only when booted as a cluster node.
2943 * Returns TRUE if the embedded nodeid indicates that this clientid was
2944 * generated on another node.
2945 */
2946 static int
foreign_clientid(cid * cidp)2947 foreign_clientid(cid *cidp)
2948 {
2949 ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2950 return (cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT !=
2951 (uint32_t)clconf_get_nodeid());
2952 }
2953
2954 /*
2955 * For use only when booted as a cluster node.
2956 * Embed our cluster nodeid into the clientid.
2957 */
2958 static void
embed_nodeid(cid * cidp)2959 embed_nodeid(cid *cidp)
2960 {
2961 int clnodeid;
2962 /*
2963 * Currently, our state tables are small enough that their
2964 * ids will leave enough bits free for the nodeid. If the
2965 * tables become larger, we mustn't overwrite the id.
2966 * Equally, we only have room for so many bits of nodeid, so
2967 * must check that too.
2968 */
2969 ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2970 ASSERT(cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT == 0);
2971 clnodeid = clconf_get_nodeid();
2972 ASSERT(clnodeid <= CLUSTER_MAX_NODEID);
2973 ASSERT(clnodeid != NODEID_UNKNOWN);
2974 cidp->impl_id.c_id |= (clnodeid << CLUSTER_NODEID_SHIFT);
2975 }
2976
2977 static uint32_t
state_hash(void * key)2978 state_hash(void *key)
2979 {
2980 stateid_t *ip = (stateid_t *)key;
2981
2982 return (ip->bits.ident);
2983 }
2984
2985 static bool_t
state_compare(rfs4_entry_t u_entry,void * key)2986 state_compare(rfs4_entry_t u_entry, void *key)
2987 {
2988 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2989 stateid_t *id = (stateid_t *)key;
2990 bool_t rc;
2991
2992 rc = (sp->rs_stateid.bits.boottime == id->bits.boottime &&
2993 sp->rs_stateid.bits.ident == id->bits.ident);
2994
2995 return (rc);
2996 }
2997
2998 static void *
state_mkkey(rfs4_entry_t u_entry)2999 state_mkkey(rfs4_entry_t u_entry)
3000 {
3001 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3002
3003 return (&sp->rs_stateid);
3004 }
3005
3006 static void
rfs4_state_destroy(rfs4_entry_t u_entry)3007 rfs4_state_destroy(rfs4_entry_t u_entry)
3008 {
3009 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3010
3011 /* remove from openowner list */
3012 rfs4_dbe_lock(sp->rs_owner->ro_dbe);
3013 list_remove(&sp->rs_owner->ro_statelist, sp);
3014 rfs4_dbe_unlock(sp->rs_owner->ro_dbe);
3015
3016 list_destroy(&sp->rs_lostatelist);
3017
3018 /* release any share locks for this stateid if it's still open */
3019 if (!sp->rs_closed) {
3020 rfs4_dbe_lock(sp->rs_dbe);
3021 (void) rfs4_unshare(sp);
3022 rfs4_dbe_unlock(sp->rs_dbe);
3023 }
3024
3025 /* Were done with the file */
3026 rfs4_file_rele(sp->rs_finfo);
3027 sp->rs_finfo = NULL;
3028
3029 /* And now with the openowner */
3030 rfs4_openowner_rele(sp->rs_owner);
3031 sp->rs_owner = NULL;
3032 }
3033
3034 void
rfs4_state_rele_nounlock(rfs4_state_t * sp)3035 rfs4_state_rele_nounlock(rfs4_state_t *sp)
3036 {
3037 rfs4_dbe_rele(sp->rs_dbe);
3038 }
3039
3040 void
rfs4_state_rele(rfs4_state_t * sp)3041 rfs4_state_rele(rfs4_state_t *sp)
3042 {
3043 rw_exit(&sp->rs_finfo->rf_file_rwlock);
3044 rfs4_dbe_rele(sp->rs_dbe);
3045 }
3046
3047 static uint32_t
deleg_hash(void * key)3048 deleg_hash(void *key)
3049 {
3050 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)key;
3051
3052 return (ADDRHASH(dsp->rds_client) ^ ADDRHASH(dsp->rds_finfo));
3053 }
3054
3055 static bool_t
deleg_compare(rfs4_entry_t u_entry,void * key)3056 deleg_compare(rfs4_entry_t u_entry, void *key)
3057 {
3058 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3059 rfs4_deleg_state_t *kdsp = (rfs4_deleg_state_t *)key;
3060
3061 return (dsp->rds_client == kdsp->rds_client &&
3062 dsp->rds_finfo == kdsp->rds_finfo);
3063 }
3064
3065 static void *
deleg_mkkey(rfs4_entry_t u_entry)3066 deleg_mkkey(rfs4_entry_t u_entry)
3067 {
3068 return (u_entry);
3069 }
3070
3071 static uint32_t
deleg_state_hash(void * key)3072 deleg_state_hash(void *key)
3073 {
3074 stateid_t *ip = (stateid_t *)key;
3075
3076 return (ip->bits.ident);
3077 }
3078
3079 static bool_t
deleg_state_compare(rfs4_entry_t u_entry,void * key)3080 deleg_state_compare(rfs4_entry_t u_entry, void *key)
3081 {
3082 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3083 stateid_t *id = (stateid_t *)key;
3084 bool_t rc;
3085
3086 if (id->bits.type != DELEGID)
3087 return (FALSE);
3088
3089 rc = (dsp->rds_delegid.bits.boottime == id->bits.boottime &&
3090 dsp->rds_delegid.bits.ident == id->bits.ident);
3091
3092 return (rc);
3093 }
3094
3095 static void *
deleg_state_mkkey(rfs4_entry_t u_entry)3096 deleg_state_mkkey(rfs4_entry_t u_entry)
3097 {
3098 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3099
3100 return (&dsp->rds_delegid);
3101 }
3102
3103 static bool_t
rfs4_deleg_state_expiry(rfs4_entry_t u_entry)3104 rfs4_deleg_state_expiry(rfs4_entry_t u_entry)
3105 {
3106 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3107
3108 if (rfs4_dbe_is_invalid(dsp->rds_dbe))
3109 return (TRUE);
3110
3111 if (dsp->rds_dtype == OPEN_DELEGATE_NONE)
3112 return (TRUE);
3113
3114 if ((gethrestime_sec() - dsp->rds_client->rc_last_access
3115 > rfs4_lease_time)) {
3116 rfs4_dbe_invalidate(dsp->rds_dbe);
3117 return (TRUE);
3118 }
3119
3120 return (FALSE);
3121 }
3122
3123 static bool_t
rfs4_deleg_state_create(rfs4_entry_t u_entry,void * argp)3124 rfs4_deleg_state_create(rfs4_entry_t u_entry, void *argp)
3125 {
3126 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3127 rfs4_file_t *fp = ((rfs4_deleg_state_t *)argp)->rds_finfo;
3128 rfs4_client_t *cp = ((rfs4_deleg_state_t *)argp)->rds_client;
3129
3130 rfs4_dbe_hold(fp->rf_dbe);
3131 rfs4_dbe_hold(cp->rc_dbe);
3132
3133 dsp->rds_delegid = get_stateid(rfs4_dbe_getid(dsp->rds_dbe));
3134 dsp->rds_delegid.bits.type = DELEGID;
3135 dsp->rds_finfo = fp;
3136 dsp->rds_client = cp;
3137 dsp->rds_dtype = OPEN_DELEGATE_NONE;
3138
3139 dsp->rds_time_granted = gethrestime_sec(); /* observability */
3140 dsp->rds_time_revoked = 0;
3141
3142 list_link_init(&dsp->rds_node);
3143
3144 return (TRUE);
3145 }
3146
3147 static void
rfs4_deleg_state_destroy(rfs4_entry_t u_entry)3148 rfs4_deleg_state_destroy(rfs4_entry_t u_entry)
3149 {
3150 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3151
3152 /* return delegation if necessary */
3153 rfs4_return_deleg(dsp, FALSE);
3154
3155 /* Were done with the file */
3156 rfs4_file_rele(dsp->rds_finfo);
3157 dsp->rds_finfo = NULL;
3158
3159 /* And now with the openowner */
3160 rfs4_client_rele(dsp->rds_client);
3161 dsp->rds_client = NULL;
3162 }
3163
3164 rfs4_deleg_state_t *
rfs4_finddeleg(rfs4_state_t * sp,bool_t * create)3165 rfs4_finddeleg(rfs4_state_t *sp, bool_t *create)
3166 {
3167 rfs4_deleg_state_t ds, *dsp;
3168 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3169
3170 ds.rds_client = sp->rs_owner->ro_client;
3171 ds.rds_finfo = sp->rs_finfo;
3172
3173 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_idx, &ds,
3174 create, &ds, RFS4_DBS_VALID);
3175
3176 return (dsp);
3177 }
3178
3179 rfs4_deleg_state_t *
rfs4_finddelegstate(stateid_t * id)3180 rfs4_finddelegstate(stateid_t *id)
3181 {
3182 rfs4_deleg_state_t *dsp;
3183 bool_t create = FALSE;
3184 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3185
3186 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(nsrv4->rfs4_deleg_state_idx,
3187 id, &create, NULL, RFS4_DBS_VALID);
3188
3189 return (dsp);
3190 }
3191
3192 void
rfs4_deleg_state_rele(rfs4_deleg_state_t * dsp)3193 rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp)
3194 {
3195 rfs4_dbe_rele(dsp->rds_dbe);
3196 }
3197
3198 void
rfs4_update_lock_sequence(rfs4_lo_state_t * lsp)3199 rfs4_update_lock_sequence(rfs4_lo_state_t *lsp)
3200 {
3201
3202 rfs4_dbe_lock(lsp->rls_dbe);
3203
3204 /*
3205 * If we are skipping sequence id checking, this means that
3206 * this is the first lock request and therefore the sequence
3207 * id does not need to be updated. This only happens on the
3208 * first lock request for a lockowner
3209 */
3210 if (!lsp->rls_skip_seqid_check)
3211 lsp->rls_seqid++;
3212
3213 rfs4_dbe_unlock(lsp->rls_dbe);
3214 }
3215
3216 void
rfs4_update_lock_resp(rfs4_lo_state_t * lsp,nfs_resop4 * resp)3217 rfs4_update_lock_resp(rfs4_lo_state_t *lsp, nfs_resop4 *resp)
3218 {
3219
3220 rfs4_dbe_lock(lsp->rls_dbe);
3221
3222 rfs4_free_reply(&lsp->rls_reply);
3223
3224 rfs4_copy_reply(&lsp->rls_reply, resp);
3225
3226 rfs4_dbe_unlock(lsp->rls_dbe);
3227 }
3228
3229 void
rfs4_free_opens(rfs4_openowner_t * oo,bool_t invalidate,bool_t close_of_client)3230 rfs4_free_opens(rfs4_openowner_t *oo, bool_t invalidate,
3231 bool_t close_of_client)
3232 {
3233 rfs4_state_t *sp;
3234
3235 rfs4_dbe_lock(oo->ro_dbe);
3236
3237 for (sp = list_head(&oo->ro_statelist); sp != NULL;
3238 sp = list_next(&oo->ro_statelist, sp)) {
3239 rfs4_state_close(sp, FALSE, close_of_client, CRED());
3240 if (invalidate == TRUE)
3241 rfs4_dbe_invalidate(sp->rs_dbe);
3242 }
3243
3244 rfs4_dbe_invalidate(oo->ro_dbe);
3245 rfs4_dbe_unlock(oo->ro_dbe);
3246 }
3247
3248 static uint32_t
state_owner_file_hash(void * key)3249 state_owner_file_hash(void *key)
3250 {
3251 rfs4_state_t *sp = key;
3252
3253 return (ADDRHASH(sp->rs_owner) ^ ADDRHASH(sp->rs_finfo));
3254 }
3255
3256 static bool_t
state_owner_file_compare(rfs4_entry_t u_entry,void * key)3257 state_owner_file_compare(rfs4_entry_t u_entry, void *key)
3258 {
3259 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3260 rfs4_state_t *arg = key;
3261
3262 if (sp->rs_closed == TRUE)
3263 return (FALSE);
3264
3265 return (arg->rs_owner == sp->rs_owner && arg->rs_finfo == sp->rs_finfo);
3266 }
3267
3268 static void *
state_owner_file_mkkey(rfs4_entry_t u_entry)3269 state_owner_file_mkkey(rfs4_entry_t u_entry)
3270 {
3271 return (u_entry);
3272 }
3273
3274 static uint32_t
state_file_hash(void * key)3275 state_file_hash(void *key)
3276 {
3277 return (ADDRHASH(key));
3278 }
3279
3280 static bool_t
state_file_compare(rfs4_entry_t u_entry,void * key)3281 state_file_compare(rfs4_entry_t u_entry, void *key)
3282 {
3283 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3284 rfs4_file_t *fp = key;
3285
3286 if (sp->rs_closed == TRUE)
3287 return (FALSE);
3288
3289 return (fp == sp->rs_finfo);
3290 }
3291
3292 static void *
state_file_mkkey(rfs4_entry_t u_entry)3293 state_file_mkkey(rfs4_entry_t u_entry)
3294 {
3295 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3296
3297 return (sp->rs_finfo);
3298 }
3299
3300 rfs4_state_t *
rfs4_findstate_by_owner_file(rfs4_openowner_t * oo,rfs4_file_t * fp,bool_t * create)3301 rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp,
3302 bool_t *create)
3303 {
3304 rfs4_state_t *sp;
3305 rfs4_state_t key;
3306 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3307
3308 key.rs_owner = oo;
3309 key.rs_finfo = fp;
3310
3311 sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_owner_file_idx,
3312 &key, create, &key, RFS4_DBS_VALID);
3313
3314 return (sp);
3315 }
3316
3317 /* This returns ANY state struct that refers to this file */
3318 static rfs4_state_t *
rfs4_findstate_by_file(rfs4_file_t * fp)3319 rfs4_findstate_by_file(rfs4_file_t *fp)
3320 {
3321 bool_t create = FALSE;
3322 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3323
3324 return ((rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_file_idx, fp,
3325 &create, fp, RFS4_DBS_VALID));
3326 }
3327
3328 static bool_t
rfs4_state_expiry(rfs4_entry_t u_entry)3329 rfs4_state_expiry(rfs4_entry_t u_entry)
3330 {
3331 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3332
3333 if (rfs4_dbe_is_invalid(sp->rs_dbe))
3334 return (TRUE);
3335
3336 if (sp->rs_closed == TRUE &&
3337 ((gethrestime_sec() - rfs4_dbe_get_timerele(sp->rs_dbe))
3338 > rfs4_lease_time))
3339 return (TRUE);
3340
3341 return ((gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access
3342 > rfs4_lease_time));
3343 }
3344
3345 static bool_t
rfs4_state_create(rfs4_entry_t u_entry,void * argp)3346 rfs4_state_create(rfs4_entry_t u_entry, void *argp)
3347 {
3348 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3349 rfs4_file_t *fp = ((rfs4_state_t *)argp)->rs_finfo;
3350 rfs4_openowner_t *oo = ((rfs4_state_t *)argp)->rs_owner;
3351
3352 rfs4_dbe_hold(fp->rf_dbe);
3353 rfs4_dbe_hold(oo->ro_dbe);
3354 sp->rs_stateid = get_stateid(rfs4_dbe_getid(sp->rs_dbe));
3355 sp->rs_stateid.bits.type = OPENID;
3356 sp->rs_owner = oo;
3357 sp->rs_finfo = fp;
3358
3359 list_create(&sp->rs_lostatelist, sizeof (rfs4_lo_state_t),
3360 offsetof(rfs4_lo_state_t, rls_node));
3361
3362 /* Insert state on per open owner's list */
3363 rfs4_dbe_lock(oo->ro_dbe);
3364 list_insert_tail(&oo->ro_statelist, sp);
3365 rfs4_dbe_unlock(oo->ro_dbe);
3366
3367 return (TRUE);
3368 }
3369
3370 static rfs4_state_t *
rfs4_findstate(stateid_t * id,rfs4_dbsearch_type_t find_invalid,bool_t lock_fp)3371 rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3372 {
3373 rfs4_state_t *sp;
3374 bool_t create = FALSE;
3375 nfs4_srv_t *nsrv4 = nfs4_get_srv();
3376
3377 sp = (rfs4_state_t *)rfs4_dbsearch(nsrv4->rfs4_state_idx, id,
3378 &create, NULL, find_invalid);
3379 if (lock_fp == TRUE && sp != NULL)
3380 rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER);
3381
3382 return (sp);
3383 }
3384
3385 void
rfs4_state_close(rfs4_state_t * sp,bool_t lock_held,bool_t close_of_client,cred_t * cr)3386 rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, bool_t close_of_client,
3387 cred_t *cr)
3388 {
3389 /* Remove the associated lo_state owners */
3390 if (!lock_held)
3391 rfs4_dbe_lock(sp->rs_dbe);
3392
3393 /*
3394 * If refcnt == 0, the dbe is about to be destroyed.
3395 * lock state will be released by the reaper thread.
3396 */
3397
3398 if (rfs4_dbe_refcnt(sp->rs_dbe) > 0) {
3399 if (sp->rs_closed == FALSE) {
3400 rfs4_release_share_lock_state(sp, cr, close_of_client);
3401 sp->rs_closed = TRUE;
3402 }
3403 }
3404
3405 if (!lock_held)
3406 rfs4_dbe_unlock(sp->rs_dbe);
3407 }
3408
3409 /*
3410 * Remove all state associated with the given client.
3411 */
3412 void
rfs4_client_state_remove(rfs4_client_t * cp)3413 rfs4_client_state_remove(rfs4_client_t *cp)
3414 {
3415 rfs4_openowner_t *oo;
3416
3417 rfs4_dbe_lock(cp->rc_dbe);
3418
3419 for (oo = list_head(&cp->rc_openownerlist); oo != NULL;
3420 oo = list_next(&cp->rc_openownerlist, oo)) {
3421 rfs4_free_opens(oo, TRUE, TRUE);
3422 }
3423
3424 rfs4_dbe_unlock(cp->rc_dbe);
3425 }
3426
3427 void
rfs4_client_close(rfs4_client_t * cp)3428 rfs4_client_close(rfs4_client_t *cp)
3429 {
3430 /* Mark client as going away. */
3431 rfs4_dbe_lock(cp->rc_dbe);
3432 rfs4_dbe_invalidate(cp->rc_dbe);
3433 rfs4_dbe_unlock(cp->rc_dbe);
3434
3435 rfs4_client_state_remove(cp);
3436 rfs4x_client_session_remove(cp);
3437
3438 /* Release the client */
3439 rfs4_client_rele(cp);
3440 }
3441
3442 nfsstat4
rfs4_check_clientid(clientid4 * cp,int setclid_confirm)3443 rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
3444 {
3445 cid *cidp = (cid *) cp;
3446 nfs4_srv_t *nsrv4;
3447
3448 nsrv4 = nfs4_get_srv();
3449
3450 /*
3451 * If we are booted as a cluster node, check the embedded nodeid.
3452 * If it indicates that this clientid was generated on another node,
3453 * inform the client accordingly.
3454 */
3455 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
3456 return (NFS4ERR_STALE_CLIENTID);
3457
3458 /*
3459 * If the server start time matches the time provided
3460 * by the client (via the clientid) and this is NOT a
3461 * setclientid_confirm then return EXPIRED.
3462 */
3463 if (!setclid_confirm &&
3464 cidp->impl_id.start_time == nsrv4->rfs4_start_time)
3465 return (NFS4ERR_EXPIRED);
3466
3467 return (NFS4ERR_STALE_CLIENTID);
3468 }
3469
3470 /*
3471 * This is used when a stateid has not been found amongst the
3472 * current server's state. Check the stateid to see if it
3473 * was from this server instantiation or not.
3474 */
3475 static nfsstat4
what_stateid_error(stateid_t * id,stateid_type_t type)3476 what_stateid_error(stateid_t *id, stateid_type_t type)
3477 {
3478 nfs4_srv_t *nsrv4;
3479
3480 nsrv4 = nfs4_get_srv();
3481
3482 /* If we are booted as a cluster node, was stateid locally generated? */
3483 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3484 return (NFS4ERR_STALE_STATEID);
3485
3486 /* If types don't match then no use checking further */
3487 if (type != id->bits.type)
3488 return (NFS4ERR_BAD_STATEID);
3489
3490 /* From a different server instantiation, return STALE */
3491 if (id->bits.boottime != nsrv4->rfs4_start_time)
3492 return (NFS4ERR_STALE_STATEID);
3493
3494 /*
3495 * From this server but the state is most likely beyond lease
3496 * timeout: return NFS4ERR_EXPIRED. However, there is the
3497 * case of a delegation stateid. For delegations, there is a
3498 * case where the state can be removed without the client's
3499 * knowledge/consent: revocation. In the case of delegation
3500 * revocation, the delegation state will be removed and will
3501 * not be found. If the client does something like a
3502 * DELEGRETURN or even a READ/WRITE with a delegatoin stateid
3503 * that has been revoked, the server should return BAD_STATEID
3504 * instead of the more common EXPIRED error.
3505 */
3506 if (id->bits.boottime == nsrv4->rfs4_start_time) {
3507 if (type == DELEGID)
3508 return (NFS4ERR_BAD_STATEID);
3509 else
3510 return (NFS4ERR_EXPIRED);
3511 }
3512
3513 return (NFS4ERR_BAD_STATEID);
3514 }
3515
3516 /*
3517 * Used later on to find the various state structs. When called from
3518 * rfs4_check_stateid()->rfs4_get_all_state(), no file struct lock is
3519 * taken (it is not needed) and helps on the read/write path with
3520 * respect to performance.
3521 */
3522 static nfsstat4
rfs4_get_state_lockit(stateid4 * stateid,rfs4_state_t ** spp,rfs4_dbsearch_type_t find_invalid,bool_t lock_fp)3523 rfs4_get_state_lockit(stateid4 *stateid, rfs4_state_t **spp,
3524 rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3525 {
3526 stateid_t *id = (stateid_t *)stateid;
3527 rfs4_state_t *sp;
3528
3529 *spp = NULL;
3530
3531 /* If we are booted as a cluster node, was stateid locally generated? */
3532 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3533 return (NFS4ERR_STALE_STATEID);
3534
3535 sp = rfs4_findstate(id, find_invalid, lock_fp);
3536 if (sp == NULL) {
3537 return (what_stateid_error(id, OPENID));
3538 }
3539
3540 if (rfs4_lease_expired(sp->rs_owner->ro_client)) {
3541 if (lock_fp == TRUE)
3542 rfs4_state_rele(sp);
3543 else
3544 rfs4_state_rele_nounlock(sp);
3545 return (NFS4ERR_EXPIRED);
3546 }
3547
3548 *spp = sp;
3549
3550 return (NFS4_OK);
3551 }
3552
3553 nfsstat4
rfs4_get_state(stateid4 * stateid,rfs4_state_t ** spp,rfs4_dbsearch_type_t find_invalid)3554 rfs4_get_state(stateid4 *stateid, rfs4_state_t **spp,
3555 rfs4_dbsearch_type_t find_invalid)
3556 {
3557 return (rfs4_get_state_lockit(stateid, spp, find_invalid, TRUE));
3558 }
3559
3560 nfsstat4
rfs4_get_state_nolock(stateid4 * stateid,rfs4_state_t ** spp,rfs4_dbsearch_type_t find_invalid)3561 rfs4_get_state_nolock(stateid4 *stateid, rfs4_state_t **spp,
3562 rfs4_dbsearch_type_t find_invalid)
3563 {
3564 return (rfs4_get_state_lockit(stateid, spp, find_invalid, FALSE));
3565 }
3566
3567 int
rfs4_check_stateid_seqid(rfs4_state_t * sp,stateid4 * stateid,const compound_state_t * cs)3568 rfs4_check_stateid_seqid(rfs4_state_t *sp, stateid4 *stateid,
3569 const compound_state_t *cs)
3570 {
3571 stateid_t *id = (stateid_t *)stateid;
3572 bool_t has_session = rfs4_has_session(cs);
3573
3574 if (rfs4_lease_expired(sp->rs_owner->ro_client))
3575 return (NFS4_CHECK_STATEID_EXPIRED);
3576
3577 if (has_session && id->bits.chgseq == 0)
3578 return (NFS4_CHECK_STATEID_OKAY);
3579
3580 /* Stateid is some time in the future - that's bad */
3581 if (sp->rs_stateid.bits.chgseq < id->bits.chgseq)
3582 return (NFS4_CHECK_STATEID_BAD);
3583
3584 if (!has_session &&
3585 sp->rs_stateid.bits.chgseq == id->bits.chgseq + 1) {
3586 return (NFS4_CHECK_STATEID_REPLAY);
3587 }
3588
3589 /* Stateid is some time in the past - that's old */
3590 if (sp->rs_stateid.bits.chgseq > id->bits.chgseq)
3591 return (NFS4_CHECK_STATEID_OLD);
3592
3593 /* Caller needs to know about confirmation before closure */
3594 if (sp->rs_owner->ro_need_confirm)
3595 return (NFS4_CHECK_STATEID_UNCONFIRMED);
3596
3597 if (sp->rs_closed == TRUE)
3598 return (NFS4_CHECK_STATEID_CLOSED);
3599
3600 return (NFS4_CHECK_STATEID_OKAY);
3601 }
3602
3603 int
rfs4_check_lo_stateid_seqid(rfs4_lo_state_t * lsp,stateid4 * stateid,const compound_state_t * cs)3604 rfs4_check_lo_stateid_seqid(rfs4_lo_state_t *lsp, stateid4 *stateid,
3605 const compound_state_t *cs)
3606 {
3607 stateid_t *id = (stateid_t *)stateid;
3608 bool_t has_session = rfs4_has_session(cs);
3609
3610 if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client))
3611 return (NFS4_CHECK_STATEID_EXPIRED);
3612
3613 if (has_session && id->bits.chgseq == 0)
3614 return (NFS4_CHECK_STATEID_OKAY);
3615
3616 /* Stateid is some time in the future - that's bad */
3617 if (lsp->rls_lockid.bits.chgseq < id->bits.chgseq)
3618 return (NFS4_CHECK_STATEID_BAD);
3619
3620 if (!has_session &&
3621 lsp->rls_lockid.bits.chgseq == id->bits.chgseq + 1) {
3622 return (NFS4_CHECK_STATEID_REPLAY);
3623 }
3624
3625 /* Stateid is some time in the past - that's old */
3626 if (lsp->rls_lockid.bits.chgseq > id->bits.chgseq)
3627 return (NFS4_CHECK_STATEID_OLD);
3628
3629 if (lsp->rls_state->rs_closed == TRUE)
3630 return (NFS4_CHECK_STATEID_CLOSED);
3631
3632 return (NFS4_CHECK_STATEID_OKAY);
3633 }
3634
3635 nfsstat4
rfs4_get_deleg_state(stateid4 * stateid,rfs4_deleg_state_t ** dspp)3636 rfs4_get_deleg_state(stateid4 *stateid, rfs4_deleg_state_t **dspp)
3637 {
3638 stateid_t *id = (stateid_t *)stateid;
3639 rfs4_deleg_state_t *dsp;
3640
3641 *dspp = NULL;
3642
3643 /* If we are booted as a cluster node, was stateid locally generated? */
3644 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3645 return (NFS4ERR_STALE_STATEID);
3646
3647 dsp = rfs4_finddelegstate(id);
3648 if (dsp == NULL) {
3649 return (what_stateid_error(id, DELEGID));
3650 }
3651
3652 if (rfs4_lease_expired(dsp->rds_client)) {
3653 rfs4_deleg_state_rele(dsp);
3654 return (NFS4ERR_EXPIRED);
3655 }
3656
3657 *dspp = dsp;
3658
3659 return (NFS4_OK);
3660 }
3661
3662 nfsstat4
rfs4_get_lo_state(stateid4 * stateid,rfs4_lo_state_t ** lspp,bool_t lock_fp)3663 rfs4_get_lo_state(stateid4 *stateid, rfs4_lo_state_t **lspp, bool_t lock_fp)
3664 {
3665 stateid_t *id = (stateid_t *)stateid;
3666 rfs4_lo_state_t *lsp;
3667
3668 *lspp = NULL;
3669
3670 /* If we are booted as a cluster node, was stateid locally generated? */
3671 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3672 return (NFS4ERR_STALE_STATEID);
3673
3674 lsp = rfs4_findlo_state(id, lock_fp);
3675 if (lsp == NULL) {
3676 return (what_stateid_error(id, LOCKID));
3677 }
3678
3679 if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client)) {
3680 rfs4_lo_state_rele(lsp, lock_fp);
3681 return (NFS4ERR_EXPIRED);
3682 }
3683
3684 *lspp = lsp;
3685
3686 return (NFS4_OK);
3687 }
3688
3689 static nfsstat4
rfs4_get_all_state(stateid4 * sid,rfs4_state_t ** spp,rfs4_deleg_state_t ** dspp,rfs4_lo_state_t ** lspp)3690 rfs4_get_all_state(stateid4 *sid, rfs4_state_t **spp,
3691 rfs4_deleg_state_t **dspp, rfs4_lo_state_t **lspp)
3692 {
3693 rfs4_state_t *sp = NULL;
3694 rfs4_deleg_state_t *dsp = NULL;
3695 rfs4_lo_state_t *lsp = NULL;
3696 stateid_t *id;
3697 nfsstat4 status;
3698
3699 *spp = NULL; *dspp = NULL; *lspp = NULL;
3700
3701 id = (stateid_t *)sid;
3702 switch (id->bits.type) {
3703 case OPENID:
3704 status = rfs4_get_state_lockit(sid, &sp, FALSE, FALSE);
3705 break;
3706 case DELEGID:
3707 status = rfs4_get_deleg_state(sid, &dsp);
3708 break;
3709 case LOCKID:
3710 status = rfs4_get_lo_state(sid, &lsp, FALSE);
3711 if (status == NFS4_OK) {
3712 sp = lsp->rls_state;
3713 rfs4_dbe_hold(sp->rs_dbe);
3714 }
3715 break;
3716 default:
3717 status = NFS4ERR_BAD_STATEID;
3718 }
3719
3720 if (status == NFS4_OK) {
3721 *spp = sp;
3722 *dspp = dsp;
3723 *lspp = lsp;
3724 }
3725
3726 return (status);
3727 }
3728
3729 /*
3730 * Given the I/O mode (FREAD or FWRITE), this checks whether the
3731 * rfs4_state_t struct has access to do this operation and if so
3732 * return NFS4_OK; otherwise the proper NFSv4 error is returned.
3733 */
3734 nfsstat4
rfs4_state_has_access(rfs4_state_t * sp,int mode,vnode_t * vp)3735 rfs4_state_has_access(rfs4_state_t *sp, int mode, vnode_t *vp)
3736 {
3737 nfsstat4 stat = NFS4_OK;
3738 rfs4_file_t *fp;
3739 bool_t create = FALSE;
3740
3741 rfs4_dbe_lock(sp->rs_dbe);
3742 if (mode == FWRITE) {
3743 if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)) {
3744 stat = NFS4ERR_OPENMODE;
3745 }
3746 } else if (mode == FREAD) {
3747 if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)) {
3748 /*
3749 * If we have OPENed the file with DENYing access
3750 * to both READ and WRITE then no one else could
3751 * have OPENed the file, hence no conflicting READ
3752 * deny. This check is merely an optimization.
3753 */
3754 if (sp->rs_share_deny == OPEN4_SHARE_DENY_BOTH)
3755 goto out;
3756
3757 /* Check against file struct's DENY mode */
3758 fp = rfs4_findfile(vp, NULL, &create);
3759 if (fp != NULL) {
3760 int deny_read = 0;
3761 rfs4_dbe_lock(fp->rf_dbe);
3762 /*
3763 * Check if any other open owner has the file
3764 * OPENed with deny READ.
3765 */
3766 if (sp->rs_share_deny & OPEN4_SHARE_DENY_READ)
3767 deny_read = 1;
3768 ASSERT(fp->rf_deny_read >= deny_read);
3769 if (fp->rf_deny_read > deny_read)
3770 stat = NFS4ERR_OPENMODE;
3771 rfs4_dbe_unlock(fp->rf_dbe);
3772 rfs4_file_rele(fp);
3773 }
3774 }
3775 } else {
3776 /* Illegal I/O mode */
3777 stat = NFS4ERR_INVAL;
3778 }
3779 out:
3780 rfs4_dbe_unlock(sp->rs_dbe);
3781 return (stat);
3782 }
3783
3784 static nfsstat4
check_state_seqid(stateid_t * st,stateid_t * in,bool_t has_session)3785 check_state_seqid(stateid_t *st, stateid_t *in, bool_t has_session)
3786 {
3787 /* rfc56661, section 8.2.2, "seqid to zero" */
3788 if (has_session && in->bits.chgseq == 0)
3789 return (NFS4_OK);
3790
3791 /* Seqid in the future? - that's bad */
3792 if (st->bits.chgseq < in->bits.chgseq)
3793 return (NFS4ERR_BAD_STATEID);
3794
3795 /* Seqid in the past? - that's old */
3796 if (st->bits.chgseq > in->bits.chgseq)
3797 return (NFS4ERR_OLD_STATEID);
3798
3799 return (NFS4_OK);
3800 }
3801
3802 /*
3803 * Given the I/O mode (FREAD or FWRITE), the vnode, the stateid and whether
3804 * the file is being truncated, return NFS4_OK if allowed or appropriate
3805 * V4 error if not. Note NFS4ERR_DELAY will be returned and a recall on
3806 * the associated file will be done if the I/O is not consistent with any
3807 * delegation in effect on the file. Should be holding VOP_RWLOCK, either
3808 * as reader or writer as appropriate. rfs4_op_open will acquire the
3809 * VOP_RWLOCK as writer when setting up delegation. If the stateid is bad
3810 * this routine will return NFS4ERR_BAD_STATEID. In addition, through the
3811 * deleg parameter, we will return whether a write delegation is held by
3812 * the client associated with this stateid.
3813 * If the server instance associated with the relevant client is in its
3814 * grace period, return NFS4ERR_GRACE.
3815 */
3816
3817 nfsstat4
rfs4_check_stateid(int mode,vnode_t * vp,stateid4 * stateid,bool_t trunc,bool_t * deleg,bool_t do_access,caller_context_t * ct,compound_state_t * cs)3818 rfs4_check_stateid(int mode, vnode_t *vp,
3819 stateid4 *stateid, bool_t trunc, bool_t *deleg,
3820 bool_t do_access, caller_context_t *ct, compound_state_t *cs)
3821 {
3822 rfs4_file_t *fp;
3823 bool_t create = FALSE;
3824 rfs4_state_t *sp;
3825 rfs4_deleg_state_t *dsp;
3826 rfs4_lo_state_t *lsp;
3827 stateid_t *id = (stateid_t *)stateid;
3828 nfsstat4 stat = NFS4_OK;
3829 bool_t use_ss = rfs4_has_session(cs);
3830
3831 if (ct != NULL) {
3832 ct->cc_sysid = 0;
3833 ct->cc_pid = 0;
3834 ct->cc_caller_id = nfs4_srv_caller_id;
3835 ct->cc_flags = CC_DONTBLOCK;
3836 }
3837
3838 if (ZERO_STATEID(stateid) || ONE_STATEID(stateid)) {
3839 fp = rfs4_findfile(vp, NULL, &create);
3840 if (fp == NULL)
3841 return (NFS4_OK);
3842
3843 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
3844 rfs4_file_rele(fp);
3845 return (NFS4_OK);
3846 }
3847 if (mode == FWRITE ||
3848 fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
3849 rfs4_recall_deleg(fp, trunc, NULL);
3850 rfs4_file_rele(fp);
3851 return (NFS4ERR_DELAY);
3852 }
3853 rfs4_file_rele(fp);
3854 return (NFS4_OK);
3855 } else {
3856 stat = rfs4_get_all_state(stateid, &sp, &dsp, &lsp);
3857 if (stat != NFS4_OK)
3858 return (stat);
3859
3860 if (lsp != NULL) {
3861 /* Is associated server instance in its grace period? */
3862 if (rfs4_clnt_in_grace(lsp->rls_locker->rl_client)) {
3863 rfs4_lo_state_rele(lsp, FALSE);
3864 if (sp != NULL)
3865 rfs4_state_rele_nounlock(sp);
3866 return (NFS4ERR_GRACE);
3867 }
3868
3869 ASSERT(id->bits.type == LOCKID);
3870 stat = check_state_seqid(&lsp->rls_lockid, id, use_ss);
3871 if (stat) {
3872 rfs4_lo_state_rele(lsp, FALSE);
3873 if (sp)
3874 rfs4_state_rele_nounlock(sp);
3875 return (stat);
3876 }
3877
3878 /* Ensure specified filehandle matches */
3879 if (lsp->rls_state->rs_finfo->rf_vp != vp) {
3880 rfs4_lo_state_rele(lsp, FALSE);
3881 if (sp != NULL)
3882 rfs4_state_rele_nounlock(sp);
3883 return (NFS4ERR_BAD_STATEID);
3884 }
3885
3886 if (ct != NULL) {
3887 ct->cc_sysid =
3888 lsp->rls_locker->rl_client->rc_sysidt;
3889 ct->cc_pid = lsp->rls_locker->rl_pid;
3890 }
3891 rfs4_lo_state_rele(lsp, FALSE);
3892 }
3893
3894 /* Stateid provided was an "open" stateid */
3895 if (sp != NULL) {
3896 /* Is associated server instance in its grace period? */
3897 if (rfs4_clnt_in_grace(sp->rs_owner->ro_client)) {
3898 rfs4_state_rele_nounlock(sp);
3899 return (NFS4ERR_GRACE);
3900 }
3901 /* Skip if is here via the LOCKID */
3902 if (id->bits.type == OPENID) {
3903 stat = check_state_seqid(&sp->rs_stateid, id,
3904 use_ss);
3905 if (stat) {
3906 rfs4_state_rele_nounlock(sp);
3907 return (stat);
3908 }
3909 }
3910 /* Ensure specified filehandle matches */
3911 if (sp->rs_finfo->rf_vp != vp) {
3912 rfs4_state_rele_nounlock(sp);
3913 return (NFS4ERR_BAD_STATEID);
3914 }
3915
3916 if (sp->rs_owner->ro_need_confirm) {
3917 rfs4_state_rele_nounlock(sp);
3918 return (NFS4ERR_BAD_STATEID);
3919 }
3920
3921 if (sp->rs_closed == TRUE) {
3922 rfs4_state_rele_nounlock(sp);
3923 return (NFS4ERR_OLD_STATEID);
3924 }
3925
3926 if (do_access)
3927 stat = rfs4_state_has_access(sp, mode, vp);
3928 else
3929 stat = NFS4_OK;
3930
3931 /*
3932 * Return whether this state has write
3933 * delegation if desired
3934 */
3935 if (deleg && (sp->rs_finfo->rf_dinfo.rd_dtype ==
3936 OPEN_DELEGATE_WRITE))
3937 *deleg = TRUE;
3938
3939 /*
3940 * We got a valid stateid, so we update the
3941 * lease on the client. Ideally we would like
3942 * to do this after the calling op succeeds,
3943 * but for now this will be good
3944 * enough. Callers of this routine are
3945 * currently insulated from the state stuff.
3946 */
3947 rfs4_update_lease(sp->rs_owner->ro_client);
3948
3949 /*
3950 * If a delegation is present on this file and
3951 * this is a WRITE, then update the lastwrite
3952 * time to indicate that activity is present.
3953 */
3954 if (sp->rs_finfo->rf_dinfo.rd_dtype ==
3955 OPEN_DELEGATE_WRITE &&
3956 mode == FWRITE) {
3957 sp->rs_finfo->rf_dinfo.rd_time_lastwrite =
3958 gethrestime_sec();
3959 }
3960
3961 /* Fill context for possible nbmand check */
3962 if (ct != NULL && ct->cc_pid == 0) {
3963 ct->cc_sysid =
3964 sp->rs_owner->ro_client->rc_sysidt;
3965 ct->cc_pid =
3966 rfs4_dbe_getid(sp->rs_owner->ro_dbe);
3967 }
3968
3969 rfs4_state_rele_nounlock(sp);
3970
3971 return (stat);
3972 }
3973
3974 if (dsp != NULL) {
3975 /* Is associated server instance in its grace period? */
3976 if (rfs4_clnt_in_grace(dsp->rds_client)) {
3977 rfs4_deleg_state_rele(dsp);
3978 return (NFS4ERR_GRACE);
3979 }
3980
3981 stat = check_state_seqid(&dsp->rds_delegid, id, use_ss);
3982 if (stat) {
3983 rfs4_deleg_state_rele(dsp);
3984 return (stat);
3985 }
3986
3987 /* Ensure specified filehandle matches */
3988 if (dsp->rds_finfo->rf_vp != vp) {
3989 rfs4_deleg_state_rele(dsp);
3990 return (NFS4ERR_BAD_STATEID);
3991 }
3992 /*
3993 * Return whether this state has write
3994 * delegation if desired
3995 */
3996 if (deleg && (dsp->rds_finfo->rf_dinfo.rd_dtype ==
3997 OPEN_DELEGATE_WRITE))
3998 *deleg = TRUE;
3999
4000 rfs4_update_lease(dsp->rds_client);
4001
4002 /*
4003 * If a delegation is present on this file and
4004 * this is a WRITE, then update the lastwrite
4005 * time to indicate that activity is present.
4006 */
4007 if (dsp->rds_finfo->rf_dinfo.rd_dtype ==
4008 OPEN_DELEGATE_WRITE && mode == FWRITE) {
4009 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite =
4010 gethrestime_sec();
4011 }
4012
4013 /*
4014 * XXX - what happens if this is a WRITE and the
4015 * delegation type of for READ.
4016 */
4017 rfs4_deleg_state_rele(dsp);
4018
4019 return (stat);
4020 }
4021 /*
4022 * If we got this far, something bad happened
4023 */
4024 return (NFS4ERR_BAD_STATEID);
4025 }
4026 }
4027
4028
4029 /*
4030 * This is a special function in that for the file struct provided the
4031 * server wants to remove/close all current state associated with the
4032 * file. The prime use of this would be with OP_REMOVE to force the
4033 * release of state and particularly of file locks.
4034 *
4035 * There is an assumption that there is no delegations outstanding on
4036 * this file at this point. The caller should have waited for those
4037 * to be returned or revoked.
4038 */
4039 void
rfs4_close_all_state(rfs4_file_t * fp)4040 rfs4_close_all_state(rfs4_file_t *fp)
4041 {
4042 rfs4_state_t *sp;
4043
4044 rfs4_dbe_lock(fp->rf_dbe);
4045
4046 #ifdef DEBUG
4047 /* only applies when server is handing out delegations */
4048 if (nfs4_get_deleg_policy() != SRV_NEVER_DELEGATE)
4049 ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
4050 #endif
4051
4052 /* No delegations for this file */
4053 ASSERT(list_is_empty(&fp->rf_delegstatelist));
4054
4055 /* Make sure that it can not be found */
4056 rfs4_dbe_invalidate(fp->rf_dbe);
4057
4058 if (fp->rf_vp == NULL) {
4059 rfs4_dbe_unlock(fp->rf_dbe);
4060 return;
4061 }
4062 rfs4_dbe_unlock(fp->rf_dbe);
4063
4064 /*
4065 * Hold as writer to prevent other server threads from
4066 * processing requests related to the file while all state is
4067 * being removed.
4068 */
4069 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
4070
4071 /* Remove ALL state from the file */
4072 while ((sp = rfs4_findstate_by_file(fp)) != NULL) {
4073 rfs4_state_close(sp, FALSE, FALSE, CRED());
4074 rfs4_state_rele_nounlock(sp);
4075 }
4076
4077 /*
4078 * This is only safe since there are no further references to
4079 * the file.
4080 */
4081 rfs4_dbe_lock(fp->rf_dbe);
4082 if (fp->rf_vp) {
4083 vnode_t *vp = fp->rf_vp;
4084
4085 mutex_enter(&vp->v_vsd_lock);
4086 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
4087 mutex_exit(&vp->v_vsd_lock);
4088 VN_RELE(vp);
4089 fp->rf_vp = NULL;
4090 }
4091 rfs4_dbe_unlock(fp->rf_dbe);
4092
4093 /* Finally let other references to proceed */
4094 rw_exit(&fp->rf_file_rwlock);
4095 }
4096
4097 /*
4098 * This function is used as a target for the rfs4_dbe_walk() call
4099 * below. The purpose of this function is to see if the
4100 * lockowner_state refers to a file that resides within the exportinfo
4101 * export. If so, then remove the lock_owner state (file locks and
4102 * share "locks") for this object since the intent is the server is
4103 * unexporting the specified directory. Be sure to invalidate the
4104 * object after the state has been released
4105 */
4106 static void
rfs4_lo_state_walk_callout(rfs4_entry_t u_entry,void * e)4107 rfs4_lo_state_walk_callout(rfs4_entry_t u_entry, void *e)
4108 {
4109 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
4110 struct exportinfo *exi = (struct exportinfo *)e;
4111 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
4112 fhandle_t *efhp;
4113
4114 efhp = (fhandle_t *)&exi->exi_fh;
4115 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4116
4117 FH_TO_FMT4(efhp, exi_fhp);
4118
4119 finfo_fhp = (nfs_fh4_fmt_t *)lsp->rls_state->rs_finfo->
4120 rf_filehandle.nfs_fh4_val;
4121
4122 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4123 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4124 exi_fhp->fh4_xlen) == 0) {
4125 rfs4_state_close(lsp->rls_state, FALSE, FALSE, CRED());
4126 rfs4_dbe_invalidate(lsp->rls_dbe);
4127 rfs4_dbe_invalidate(lsp->rls_state->rs_dbe);
4128 }
4129 }
4130
4131 /*
4132 * This function is used as a target for the rfs4_dbe_walk() call
4133 * below. The purpose of this function is to see if the state refers
4134 * to a file that resides within the exportinfo export. If so, then
4135 * remove the open state for this object since the intent is the
4136 * server is unexporting the specified directory. The main result for
4137 * this type of entry is to invalidate it such it will not be found in
4138 * the future.
4139 */
4140 static void
rfs4_state_walk_callout(rfs4_entry_t u_entry,void * e)4141 rfs4_state_walk_callout(rfs4_entry_t u_entry, void *e)
4142 {
4143 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
4144 struct exportinfo *exi = (struct exportinfo *)e;
4145 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
4146 fhandle_t *efhp;
4147
4148 efhp = (fhandle_t *)&exi->exi_fh;
4149 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4150
4151 FH_TO_FMT4(efhp, exi_fhp);
4152
4153 finfo_fhp =
4154 (nfs_fh4_fmt_t *)sp->rs_finfo->rf_filehandle.nfs_fh4_val;
4155
4156 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4157 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4158 exi_fhp->fh4_xlen) == 0) {
4159 rfs4_state_close(sp, TRUE, FALSE, CRED());
4160 rfs4_dbe_invalidate(sp->rs_dbe);
4161 }
4162 }
4163
4164 /*
4165 * This function is used as a target for the rfs4_dbe_walk() call
4166 * below. The purpose of this function is to see if the state refers
4167 * to a file that resides within the exportinfo export. If so, then
4168 * remove the deleg state for this object since the intent is the
4169 * server is unexporting the specified directory. The main result for
4170 * this type of entry is to invalidate it such it will not be found in
4171 * the future.
4172 */
4173 static void
rfs4_deleg_state_walk_callout(rfs4_entry_t u_entry,void * e)4174 rfs4_deleg_state_walk_callout(rfs4_entry_t u_entry, void *e)
4175 {
4176 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
4177 struct exportinfo *exi = (struct exportinfo *)e;
4178 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
4179 fhandle_t *efhp;
4180
4181 efhp = (fhandle_t *)&exi->exi_fh;
4182 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4183
4184 FH_TO_FMT4(efhp, exi_fhp);
4185
4186 finfo_fhp =
4187 (nfs_fh4_fmt_t *)dsp->rds_finfo->rf_filehandle.nfs_fh4_val;
4188
4189 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4190 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4191 exi_fhp->fh4_xlen) == 0) {
4192 rfs4_dbe_invalidate(dsp->rds_dbe);
4193 }
4194 }
4195
4196 /*
4197 * This function is used as a target for the rfs4_dbe_walk() call
4198 * below. The purpose of this function is to see if the state refers
4199 * to a file that resides within the exportinfo export. If so, then
4200 * release vnode hold for this object since the intent is the server
4201 * is unexporting the specified directory. Invalidation will prevent
4202 * this struct from being found in the future.
4203 */
4204 static void
rfs4_file_walk_callout(rfs4_entry_t u_entry,void * e)4205 rfs4_file_walk_callout(rfs4_entry_t u_entry, void *e)
4206 {
4207 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
4208 struct exportinfo *exi = (struct exportinfo *)e;
4209 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
4210 fhandle_t *efhp;
4211
4212 efhp = (fhandle_t *)&exi->exi_fh;
4213 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
4214
4215 FH_TO_FMT4(efhp, exi_fhp);
4216
4217 finfo_fhp = (nfs_fh4_fmt_t *)fp->rf_filehandle.nfs_fh4_val;
4218
4219 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
4220 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
4221 exi_fhp->fh4_xlen) == 0) {
4222 if (fp->rf_vp) {
4223 vnode_t *vp = fp->rf_vp;
4224
4225 /*
4226 * don't leak monitors and remove the reference
4227 * put on the vnode when the delegation was granted.
4228 */
4229 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ) {
4230 (void) fem_uninstall(vp, deleg_rdops,
4231 (void *)fp);
4232 vn_open_downgrade(vp, FREAD);
4233 } else if (fp->rf_dinfo.rd_dtype ==
4234 OPEN_DELEGATE_WRITE) {
4235 (void) fem_uninstall(vp, deleg_wrops,
4236 (void *)fp);
4237 vn_open_downgrade(vp, FREAD|FWRITE);
4238 }
4239 mutex_enter(&vp->v_vsd_lock);
4240 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
4241 mutex_exit(&vp->v_vsd_lock);
4242 VN_RELE(vp);
4243 fp->rf_vp = NULL;
4244 }
4245 rfs4_dbe_invalidate(fp->rf_dbe);
4246 }
4247 }
4248
4249 /*
4250 * Given a directory that is being unexported, cleanup/release all
4251 * state in the server that refers to objects residing underneath this
4252 * particular export. The ordering of the release is important.
4253 * Lock_owner, then state and then file.
4254 *
4255 * NFS zones note: nfs_export.c:unexport() calls this from a
4256 * thread in the global zone for NGZ data structures, so we
4257 * CANNOT use zone_getspecific anywhere in this code path.
4258 */
4259 void
rfs4_clean_state_exi(nfs_export_t * ne,struct exportinfo * exi)4260 rfs4_clean_state_exi(nfs_export_t *ne, struct exportinfo *exi)
4261 {
4262 nfs_globals_t *ng;
4263 nfs4_srv_t *nsrv4;
4264
4265 ng = ne->ne_globals;
4266 ASSERT(ng->nfs_zoneid == exi->exi_zoneid);
4267 nsrv4 = ng->nfs4_srv;
4268
4269 mutex_enter(&nsrv4->state_lock);
4270
4271 if (nsrv4->nfs4_server_state == NULL) {
4272 mutex_exit(&nsrv4->state_lock);
4273 return;
4274 }
4275
4276 rfs4_dbe_walk(nsrv4->rfs4_lo_state_tab,
4277 rfs4_lo_state_walk_callout, exi);
4278 rfs4_dbe_walk(nsrv4->rfs4_state_tab, rfs4_state_walk_callout, exi);
4279 rfs4_dbe_walk(nsrv4->rfs4_deleg_state_tab,
4280 rfs4_deleg_state_walk_callout, exi);
4281 rfs4_dbe_walk(nsrv4->rfs4_file_tab, rfs4_file_walk_callout, exi);
4282
4283 mutex_exit(&nsrv4->state_lock);
4284 }
4285