1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2014 Nexenta Systems, Inc. All rights reserved.
24 */
25
26 #include <sys/systm.h>
27 #include <sys/kmem.h>
28 #include <sys/cmn_err.h>
29 #include <sys/atomic.h>
30 #include <sys/clconf.h>
31 #include <sys/cladm.h>
32 #include <sys/flock.h>
33 #include <nfs/export.h>
34 #include <nfs/nfs.h>
35 #include <nfs/nfs4.h>
36 #include <nfs/nfssys.h>
37 #include <nfs/lm.h>
38 #include <sys/pathname.h>
39 #include <sys/sdt.h>
40 #include <sys/nvpair.h>
41
42 extern u_longlong_t nfs4_srv_caller_id;
43
44 extern time_t rfs4_start_time;
45 extern uint_t nfs4_srv_vkey;
46
47 stateid4 special0 = {
48 0,
49 { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
50 };
51
52 stateid4 special1 = {
53 0xffffffff,
54 {
55 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
56 (char)0xff, (char)0xff, (char)0xff, (char)0xff,
57 (char)0xff, (char)0xff, (char)0xff, (char)0xff
58 }
59 };
60
61
62 #define ISSPECIAL(id) (stateid4_cmp(id, &special0) || \
63 stateid4_cmp(id, &special1))
64
65 /* For embedding the cluster nodeid into our clientid */
66 #define CLUSTER_NODEID_SHIFT 24
67 #define CLUSTER_MAX_NODEID 255
68
69 #ifdef DEBUG
70 int rfs4_debug;
71 #endif
72
73 static uint32_t rfs4_database_debug = 0x00;
74
75 static void rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf);
76 static void rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dir, char *leaf);
77 static void rfs4_dss_clear_oldstate(rfs4_servinst_t *sip);
78 static void rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip);
79
80 /*
81 * Couple of simple init/destroy functions for a general waiter
82 */
83 void
rfs4_sw_init(rfs4_state_wait_t * swp)84 rfs4_sw_init(rfs4_state_wait_t *swp)
85 {
86 mutex_init(swp->sw_cv_lock, NULL, MUTEX_DEFAULT, NULL);
87 cv_init(swp->sw_cv, NULL, CV_DEFAULT, NULL);
88 swp->sw_active = FALSE;
89 swp->sw_wait_count = 0;
90 }
91
92 void
rfs4_sw_destroy(rfs4_state_wait_t * swp)93 rfs4_sw_destroy(rfs4_state_wait_t *swp)
94 {
95 mutex_destroy(swp->sw_cv_lock);
96 cv_destroy(swp->sw_cv);
97 }
98
99 void
rfs4_sw_enter(rfs4_state_wait_t * swp)100 rfs4_sw_enter(rfs4_state_wait_t *swp)
101 {
102 mutex_enter(swp->sw_cv_lock);
103 while (swp->sw_active) {
104 swp->sw_wait_count++;
105 cv_wait(swp->sw_cv, swp->sw_cv_lock);
106 swp->sw_wait_count--;
107 }
108 ASSERT(swp->sw_active == FALSE);
109 swp->sw_active = TRUE;
110 mutex_exit(swp->sw_cv_lock);
111 }
112
113 void
rfs4_sw_exit(rfs4_state_wait_t * swp)114 rfs4_sw_exit(rfs4_state_wait_t *swp)
115 {
116 mutex_enter(swp->sw_cv_lock);
117 ASSERT(swp->sw_active == TRUE);
118 swp->sw_active = FALSE;
119 if (swp->sw_wait_count != 0)
120 cv_broadcast(swp->sw_cv);
121 mutex_exit(swp->sw_cv_lock);
122 }
123
124 /*
125 * CPR callback id -- not related to v4 callbacks
126 */
127 static callb_id_t cpr_id = 0;
128
129 static void
deep_lock_copy(LOCK4res * dres,LOCK4res * sres)130 deep_lock_copy(LOCK4res *dres, LOCK4res *sres)
131 {
132 lock_owner4 *slo = &sres->LOCK4res_u.denied.owner;
133 lock_owner4 *dlo = &dres->LOCK4res_u.denied.owner;
134
135 if (sres->status == NFS4ERR_DENIED) {
136 dlo->owner_val = kmem_alloc(slo->owner_len, KM_SLEEP);
137 bcopy(slo->owner_val, dlo->owner_val, slo->owner_len);
138 }
139 }
140
141 static void
deep_lock_free(LOCK4res * res)142 deep_lock_free(LOCK4res *res)
143 {
144 lock_owner4 *lo = &res->LOCK4res_u.denied.owner;
145
146 if (res->status == NFS4ERR_DENIED)
147 kmem_free(lo->owner_val, lo->owner_len);
148 }
149
150 static void
deep_open_copy(OPEN4res * dres,OPEN4res * sres)151 deep_open_copy(OPEN4res *dres, OPEN4res *sres)
152 {
153 nfsace4 *sacep, *dacep;
154
155 if (sres->status != NFS4_OK) {
156 return;
157 }
158
159 dres->attrset = sres->attrset;
160
161 switch (sres->delegation.delegation_type) {
162 case OPEN_DELEGATE_NONE:
163 return;
164 case OPEN_DELEGATE_READ:
165 sacep = &sres->delegation.open_delegation4_u.read.permissions;
166 dacep = &dres->delegation.open_delegation4_u.read.permissions;
167 break;
168 case OPEN_DELEGATE_WRITE:
169 sacep = &sres->delegation.open_delegation4_u.write.permissions;
170 dacep = &dres->delegation.open_delegation4_u.write.permissions;
171 break;
172 }
173 dacep->who.utf8string_val =
174 kmem_alloc(sacep->who.utf8string_len, KM_SLEEP);
175 bcopy(sacep->who.utf8string_val, dacep->who.utf8string_val,
176 sacep->who.utf8string_len);
177 }
178
179 static void
deep_open_free(OPEN4res * res)180 deep_open_free(OPEN4res *res)
181 {
182 nfsace4 *acep;
183 if (res->status != NFS4_OK)
184 return;
185
186 switch (res->delegation.delegation_type) {
187 case OPEN_DELEGATE_NONE:
188 return;
189 case OPEN_DELEGATE_READ:
190 acep = &res->delegation.open_delegation4_u.read.permissions;
191 break;
192 case OPEN_DELEGATE_WRITE:
193 acep = &res->delegation.open_delegation4_u.write.permissions;
194 break;
195 }
196
197 if (acep->who.utf8string_val) {
198 kmem_free(acep->who.utf8string_val, acep->who.utf8string_len);
199 acep->who.utf8string_val = NULL;
200 }
201 }
202
203 void
rfs4_free_reply(nfs_resop4 * rp)204 rfs4_free_reply(nfs_resop4 *rp)
205 {
206 switch (rp->resop) {
207 case OP_LOCK:
208 deep_lock_free(&rp->nfs_resop4_u.oplock);
209 break;
210 case OP_OPEN:
211 deep_open_free(&rp->nfs_resop4_u.opopen);
212 default:
213 break;
214 }
215 }
216
217 void
rfs4_copy_reply(nfs_resop4 * dst,nfs_resop4 * src)218 rfs4_copy_reply(nfs_resop4 *dst, nfs_resop4 *src)
219 {
220 *dst = *src;
221
222 /* Handle responses that need deep copy */
223 switch (src->resop) {
224 case OP_LOCK:
225 deep_lock_copy(&dst->nfs_resop4_u.oplock,
226 &src->nfs_resop4_u.oplock);
227 break;
228 case OP_OPEN:
229 deep_open_copy(&dst->nfs_resop4_u.opopen,
230 &src->nfs_resop4_u.opopen);
231 break;
232 default:
233 break;
234 };
235 }
236
237 /*
238 * This is the implementation of the underlying state engine. The
239 * public interface to this engine is described by
240 * nfs4_state.h. Callers to the engine should hold no state engine
241 * locks when they call in to it. If the protocol needs to lock data
242 * structures it should do so after acquiring all references to them
243 * first and then follow the following lock order:
244 *
245 * client > openowner > state > lo_state > lockowner > file.
246 *
247 * Internally we only allow a thread to hold one hash bucket lock at a
248 * time and the lock is higher in the lock order (must be acquired
249 * first) than the data structure that is on that hash list.
250 *
251 * If a new reference was acquired by the caller, that reference needs
252 * to be released after releasing all acquired locks with the
253 * corresponding rfs4_*_rele routine.
254 */
255
256 /*
257 * This code is some what prototypical for now. Its purpose currently is to
258 * implement the interfaces sufficiently to finish the higher protocol
259 * elements. This will be replaced by a dynamically resizeable tables
260 * backed by kmem_cache allocator. However synchronization is handled
261 * correctly (I hope) and will not change by much. The mutexes for
262 * the hash buckets that can be used to create new instances of data
263 * structures might be good candidates to evolve into reader writer
264 * locks. If it has to do a creation, it would be holding the
265 * mutex across a kmem_alloc with KM_SLEEP specified.
266 */
267
268 #ifdef DEBUG
269 #define TABSIZE 17
270 #else
271 #define TABSIZE 2047
272 #endif
273
274 #define ADDRHASH(key) ((unsigned long)(key) >> 3)
275
276 /* Used to serialize create/destroy of rfs4_server_state database */
277 kmutex_t rfs4_state_lock;
278 static rfs4_database_t *rfs4_server_state = NULL;
279
280 /* Used to serialize lookups of clientids */
281 static krwlock_t rfs4_findclient_lock;
282
283 /*
284 * For now this "table" is exposed so that the CPR callback
285 * function can tromp through it..
286 */
287 rfs4_table_t *rfs4_client_tab;
288
289 static rfs4_index_t *rfs4_clientid_idx;
290 static rfs4_index_t *rfs4_nfsclnt_idx;
291 static rfs4_table_t *rfs4_clntip_tab;
292 static rfs4_index_t *rfs4_clntip_idx;
293 static rfs4_table_t *rfs4_openowner_tab;
294 static rfs4_index_t *rfs4_openowner_idx;
295 static rfs4_table_t *rfs4_state_tab;
296 static rfs4_index_t *rfs4_state_idx;
297 static rfs4_index_t *rfs4_state_owner_file_idx;
298 static rfs4_index_t *rfs4_state_file_idx;
299 static rfs4_table_t *rfs4_lo_state_tab;
300 static rfs4_index_t *rfs4_lo_state_idx;
301 static rfs4_index_t *rfs4_lo_state_owner_idx;
302 static rfs4_table_t *rfs4_lockowner_tab;
303 static rfs4_index_t *rfs4_lockowner_idx;
304 static rfs4_index_t *rfs4_lockowner_pid_idx;
305 static rfs4_table_t *rfs4_file_tab;
306 static rfs4_index_t *rfs4_file_idx;
307 static rfs4_table_t *rfs4_deleg_state_tab;
308 static rfs4_index_t *rfs4_deleg_idx;
309 static rfs4_index_t *rfs4_deleg_state_idx;
310
311 #define MAXTABSZ 1024*1024
312
313 /* The values below are rfs4_lease_time units */
314
315 #ifdef DEBUG
316 #define CLIENT_CACHE_TIME 1
317 #define OPENOWNER_CACHE_TIME 1
318 #define STATE_CACHE_TIME 1
319 #define LO_STATE_CACHE_TIME 1
320 #define LOCKOWNER_CACHE_TIME 1
321 #define FILE_CACHE_TIME 3
322 #define DELEG_STATE_CACHE_TIME 1
323 #else
324 #define CLIENT_CACHE_TIME 10
325 #define OPENOWNER_CACHE_TIME 5
326 #define STATE_CACHE_TIME 1
327 #define LO_STATE_CACHE_TIME 1
328 #define LOCKOWNER_CACHE_TIME 3
329 #define FILE_CACHE_TIME 40
330 #define DELEG_STATE_CACHE_TIME 1
331 #endif
332
333
334 static time_t rfs4_client_cache_time = 0;
335 static time_t rfs4_clntip_cache_time = 0;
336 static time_t rfs4_openowner_cache_time = 0;
337 static time_t rfs4_state_cache_time = 0;
338 static time_t rfs4_lo_state_cache_time = 0;
339 static time_t rfs4_lockowner_cache_time = 0;
340 static time_t rfs4_file_cache_time = 0;
341 static time_t rfs4_deleg_state_cache_time = 0;
342
343 static bool_t rfs4_client_create(rfs4_entry_t, void *);
344 static void rfs4_dss_remove_cpleaf(rfs4_client_t *);
345 static void rfs4_dss_remove_leaf(rfs4_servinst_t *, char *, char *);
346 static void rfs4_client_destroy(rfs4_entry_t);
347 static bool_t rfs4_client_expiry(rfs4_entry_t);
348 static uint32_t clientid_hash(void *);
349 static bool_t clientid_compare(rfs4_entry_t, void *);
350 static void *clientid_mkkey(rfs4_entry_t);
351 static uint32_t nfsclnt_hash(void *);
352 static bool_t nfsclnt_compare(rfs4_entry_t, void *);
353 static void *nfsclnt_mkkey(rfs4_entry_t);
354 static bool_t rfs4_clntip_expiry(rfs4_entry_t);
355 static void rfs4_clntip_destroy(rfs4_entry_t);
356 static bool_t rfs4_clntip_create(rfs4_entry_t, void *);
357 static uint32_t clntip_hash(void *);
358 static bool_t clntip_compare(rfs4_entry_t, void *);
359 static void *clntip_mkkey(rfs4_entry_t);
360 static bool_t rfs4_openowner_create(rfs4_entry_t, void *);
361 static void rfs4_openowner_destroy(rfs4_entry_t);
362 static bool_t rfs4_openowner_expiry(rfs4_entry_t);
363 static uint32_t openowner_hash(void *);
364 static bool_t openowner_compare(rfs4_entry_t, void *);
365 static void *openowner_mkkey(rfs4_entry_t);
366 static bool_t rfs4_state_create(rfs4_entry_t, void *);
367 static void rfs4_state_destroy(rfs4_entry_t);
368 static bool_t rfs4_state_expiry(rfs4_entry_t);
369 static uint32_t state_hash(void *);
370 static bool_t state_compare(rfs4_entry_t, void *);
371 static void *state_mkkey(rfs4_entry_t);
372 static uint32_t state_owner_file_hash(void *);
373 static bool_t state_owner_file_compare(rfs4_entry_t, void *);
374 static void *state_owner_file_mkkey(rfs4_entry_t);
375 static uint32_t state_file_hash(void *);
376 static bool_t state_file_compare(rfs4_entry_t, void *);
377 static void *state_file_mkkey(rfs4_entry_t);
378 static bool_t rfs4_lo_state_create(rfs4_entry_t, void *);
379 static void rfs4_lo_state_destroy(rfs4_entry_t);
380 static bool_t rfs4_lo_state_expiry(rfs4_entry_t);
381 static uint32_t lo_state_hash(void *);
382 static bool_t lo_state_compare(rfs4_entry_t, void *);
383 static void *lo_state_mkkey(rfs4_entry_t);
384 static uint32_t lo_state_lo_hash(void *);
385 static bool_t lo_state_lo_compare(rfs4_entry_t, void *);
386 static void *lo_state_lo_mkkey(rfs4_entry_t);
387 static bool_t rfs4_lockowner_create(rfs4_entry_t, void *);
388 static void rfs4_lockowner_destroy(rfs4_entry_t);
389 static bool_t rfs4_lockowner_expiry(rfs4_entry_t);
390 static uint32_t lockowner_hash(void *);
391 static bool_t lockowner_compare(rfs4_entry_t, void *);
392 static void *lockowner_mkkey(rfs4_entry_t);
393 static uint32_t pid_hash(void *);
394 static bool_t pid_compare(rfs4_entry_t, void *);
395 static void *pid_mkkey(rfs4_entry_t);
396 static bool_t rfs4_file_create(rfs4_entry_t, void *);
397 static void rfs4_file_destroy(rfs4_entry_t);
398 static uint32_t file_hash(void *);
399 static bool_t file_compare(rfs4_entry_t, void *);
400 static void *file_mkkey(rfs4_entry_t);
401 static bool_t rfs4_deleg_state_create(rfs4_entry_t, void *);
402 static void rfs4_deleg_state_destroy(rfs4_entry_t);
403 static bool_t rfs4_deleg_state_expiry(rfs4_entry_t);
404 static uint32_t deleg_hash(void *);
405 static bool_t deleg_compare(rfs4_entry_t, void *);
406 static void *deleg_mkkey(rfs4_entry_t);
407 static uint32_t deleg_state_hash(void *);
408 static bool_t deleg_state_compare(rfs4_entry_t, void *);
409 static void *deleg_state_mkkey(rfs4_entry_t);
410
411 static void rfs4_state_rele_nounlock(rfs4_state_t *);
412
413 static int rfs4_ss_enabled = 0;
414
415 extern void (*rfs4_client_clrst)(struct nfs4clrst_args *);
416
417 void
rfs4_ss_pnfree(rfs4_ss_pn_t * ss_pn)418 rfs4_ss_pnfree(rfs4_ss_pn_t *ss_pn)
419 {
420 kmem_free(ss_pn, sizeof (rfs4_ss_pn_t));
421 }
422
423 static rfs4_ss_pn_t *
rfs4_ss_pnalloc(char * dir,char * leaf)424 rfs4_ss_pnalloc(char *dir, char *leaf)
425 {
426 rfs4_ss_pn_t *ss_pn;
427 int dir_len, leaf_len;
428
429 /*
430 * validate we have a resonable path
431 * (account for the '/' and trailing null)
432 */
433 if ((dir_len = strlen(dir)) > MAXPATHLEN ||
434 (leaf_len = strlen(leaf)) > MAXNAMELEN ||
435 (dir_len + leaf_len + 2) > MAXPATHLEN) {
436 return (NULL);
437 }
438
439 ss_pn = kmem_alloc(sizeof (rfs4_ss_pn_t), KM_SLEEP);
440
441 (void) snprintf(ss_pn->pn, MAXPATHLEN, "%s/%s", dir, leaf);
442 /* Handy pointer to just the leaf name */
443 ss_pn->leaf = ss_pn->pn + dir_len + 1;
444 return (ss_pn);
445 }
446
447
448 /*
449 * Move the "leaf" filename from "sdir" directory
450 * to the "ddir" directory. Return the pathname of
451 * the destination unless the rename fails in which
452 * case we need to return the source pathname.
453 */
454 static rfs4_ss_pn_t *
rfs4_ss_movestate(char * sdir,char * ddir,char * leaf)455 rfs4_ss_movestate(char *sdir, char *ddir, char *leaf)
456 {
457 rfs4_ss_pn_t *src, *dst;
458
459 if ((src = rfs4_ss_pnalloc(sdir, leaf)) == NULL)
460 return (NULL);
461
462 if ((dst = rfs4_ss_pnalloc(ddir, leaf)) == NULL) {
463 rfs4_ss_pnfree(src);
464 return (NULL);
465 }
466
467 /*
468 * If the rename fails we shall return the src
469 * pathname and free the dst. Otherwise we need
470 * to free the src and return the dst pathanme.
471 */
472 if (vn_rename(src->pn, dst->pn, UIO_SYSSPACE)) {
473 rfs4_ss_pnfree(dst);
474 return (src);
475 }
476 rfs4_ss_pnfree(src);
477 return (dst);
478 }
479
480
481 static rfs4_oldstate_t *
rfs4_ss_getstate(vnode_t * dvp,rfs4_ss_pn_t * ss_pn)482 rfs4_ss_getstate(vnode_t *dvp, rfs4_ss_pn_t *ss_pn)
483 {
484 struct uio uio;
485 struct iovec iov[3];
486
487 rfs4_oldstate_t *cl_ss = NULL;
488 vnode_t *vp;
489 vattr_t va;
490 uint_t id_len;
491 int err, kill_file, file_vers;
492
493 if (ss_pn == NULL)
494 return (NULL);
495
496 /*
497 * open the state file.
498 */
499 if (vn_open(ss_pn->pn, UIO_SYSSPACE, FREAD, 0, &vp, 0, 0) != 0) {
500 return (NULL);
501 }
502
503 if (vp->v_type != VREG) {
504 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
505 VN_RELE(vp);
506 return (NULL);
507 }
508
509 err = VOP_ACCESS(vp, VREAD, 0, CRED(), NULL);
510 if (err) {
511 /*
512 * We don't have read access? better get the heck out.
513 */
514 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
515 VN_RELE(vp);
516 return (NULL);
517 }
518
519 (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
520 /*
521 * get the file size to do some basic validation
522 */
523 va.va_mask = AT_SIZE;
524 err = VOP_GETATTR(vp, &va, 0, CRED(), NULL);
525
526 kill_file = (va.va_size == 0 || va.va_size <
527 (NFS4_VERIFIER_SIZE + sizeof (uint_t)+1));
528
529 if (err || kill_file) {
530 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
531 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
532 VN_RELE(vp);
533 if (kill_file) {
534 (void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0);
535 }
536 return (NULL);
537 }
538
539 cl_ss = kmem_alloc(sizeof (rfs4_oldstate_t), KM_SLEEP);
540
541 /*
542 * build iovecs to read in the file_version, verifier and id_len
543 */
544 iov[0].iov_base = (caddr_t)&file_vers;
545 iov[0].iov_len = sizeof (int);
546 iov[1].iov_base = (caddr_t)&cl_ss->cl_id4.verifier;
547 iov[1].iov_len = NFS4_VERIFIER_SIZE;
548 iov[2].iov_base = (caddr_t)&id_len;
549 iov[2].iov_len = sizeof (uint_t);
550
551 uio.uio_iov = iov;
552 uio.uio_iovcnt = 3;
553 uio.uio_segflg = UIO_SYSSPACE;
554 uio.uio_loffset = 0;
555 uio.uio_resid = sizeof (int) + NFS4_VERIFIER_SIZE + sizeof (uint_t);
556
557 if (err = VOP_READ(vp, &uio, FREAD, CRED(), NULL)) {
558 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
559 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
560 VN_RELE(vp);
561 kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
562 return (NULL);
563 }
564
565 /*
566 * if the file_version doesn't match or if the
567 * id_len is zero or the combination of the verifier,
568 * id_len and id_val is bigger than the file we have
569 * a problem. If so ditch the file.
570 */
571 kill_file = (file_vers != NFS4_SS_VERSION || id_len == 0 ||
572 (id_len + NFS4_VERIFIER_SIZE + sizeof (uint_t)) > va.va_size);
573
574 if (err || kill_file) {
575 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
576 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
577 VN_RELE(vp);
578 kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
579 if (kill_file) {
580 (void) VOP_REMOVE(dvp, ss_pn->leaf, CRED(), NULL, 0);
581 }
582 return (NULL);
583 }
584
585 /*
586 * now get the client id value
587 */
588 cl_ss->cl_id4.id_val = kmem_alloc(id_len, KM_SLEEP);
589 iov[0].iov_base = cl_ss->cl_id4.id_val;
590 iov[0].iov_len = id_len;
591
592 uio.uio_iov = iov;
593 uio.uio_iovcnt = 1;
594 uio.uio_segflg = UIO_SYSSPACE;
595 uio.uio_resid = cl_ss->cl_id4.id_len = id_len;
596
597 if (err = VOP_READ(vp, &uio, FREAD, CRED(), NULL)) {
598 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
599 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
600 VN_RELE(vp);
601 kmem_free(cl_ss->cl_id4.id_val, id_len);
602 kmem_free(cl_ss, sizeof (rfs4_oldstate_t));
603 return (NULL);
604 }
605
606 VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
607 (void) VOP_CLOSE(vp, FREAD, 1, (offset_t)0, CRED(), NULL);
608 VN_RELE(vp);
609 return (cl_ss);
610 }
611
612 #ifdef nextdp
613 #undef nextdp
614 #endif
615 #define nextdp(dp) ((struct dirent64 *)((char *)(dp) + (dp)->d_reclen))
616
617 /*
618 * Add entries from statedir to supplied oldstate list.
619 * Optionally, move all entries from statedir -> destdir.
620 */
621 void
rfs4_ss_oldstate(rfs4_oldstate_t * oldstate,char * statedir,char * destdir)622 rfs4_ss_oldstate(rfs4_oldstate_t *oldstate, char *statedir, char *destdir)
623 {
624 rfs4_ss_pn_t *ss_pn;
625 rfs4_oldstate_t *cl_ss = NULL;
626 char *dirt = NULL;
627 int err, dir_eof = 0, size = 0;
628 vnode_t *dvp;
629 struct iovec iov;
630 struct uio uio;
631 struct dirent64 *dep;
632 offset_t dirchunk_offset = 0;
633
634 /*
635 * open the state directory
636 */
637 if (vn_open(statedir, UIO_SYSSPACE, FREAD, 0, &dvp, 0, 0))
638 return;
639
640 if (dvp->v_type != VDIR || VOP_ACCESS(dvp, VREAD, 0, CRED(), NULL))
641 goto out;
642
643 dirt = kmem_alloc(RFS4_SS_DIRSIZE, KM_SLEEP);
644
645 /*
646 * Get and process the directory entries
647 */
648 while (!dir_eof) {
649 (void) VOP_RWLOCK(dvp, V_WRITELOCK_FALSE, NULL);
650 iov.iov_base = dirt;
651 iov.iov_len = RFS4_SS_DIRSIZE;
652 uio.uio_iov = &iov;
653 uio.uio_iovcnt = 1;
654 uio.uio_segflg = UIO_SYSSPACE;
655 uio.uio_loffset = dirchunk_offset;
656 uio.uio_resid = RFS4_SS_DIRSIZE;
657
658 err = VOP_READDIR(dvp, &uio, CRED(), &dir_eof, NULL, 0);
659 VOP_RWUNLOCK(dvp, V_WRITELOCK_FALSE, NULL);
660 if (err)
661 goto out;
662
663 size = RFS4_SS_DIRSIZE - uio.uio_resid;
664
665 /*
666 * Process all the directory entries in this
667 * readdir chunk
668 */
669 for (dep = (struct dirent64 *)dirt; size > 0;
670 dep = nextdp(dep)) {
671
672 size -= dep->d_reclen;
673 dirchunk_offset = dep->d_off;
674
675 /*
676 * Skip '.' and '..'
677 */
678 if (NFS_IS_DOTNAME(dep->d_name))
679 continue;
680
681 ss_pn = rfs4_ss_pnalloc(statedir, dep->d_name);
682 if (ss_pn == NULL)
683 continue;
684
685 if (cl_ss = rfs4_ss_getstate(dvp, ss_pn)) {
686 if (destdir != NULL) {
687 rfs4_ss_pnfree(ss_pn);
688 cl_ss->ss_pn = rfs4_ss_movestate(
689 statedir, destdir, dep->d_name);
690 } else {
691 cl_ss->ss_pn = ss_pn;
692 }
693 insque(cl_ss, oldstate);
694 } else {
695 rfs4_ss_pnfree(ss_pn);
696 }
697 }
698 }
699
700 out:
701 (void) VOP_CLOSE(dvp, FREAD, 1, (offset_t)0, CRED(), NULL);
702 VN_RELE(dvp);
703 if (dirt)
704 kmem_free((caddr_t)dirt, RFS4_SS_DIRSIZE);
705 }
706
707 static void
rfs4_ss_init(void)708 rfs4_ss_init(void)
709 {
710 int npaths = 1;
711 char *default_dss_path = NFS4_DSS_VAR_DIR;
712
713 /* read the default stable storage state */
714 rfs4_dss_readstate(npaths, &default_dss_path);
715
716 rfs4_ss_enabled = 1;
717 }
718
719 static void
rfs4_ss_fini(void)720 rfs4_ss_fini(void)
721 {
722 rfs4_servinst_t *sip;
723
724 mutex_enter(&rfs4_servinst_lock);
725 sip = rfs4_cur_servinst;
726 while (sip != NULL) {
727 rfs4_dss_clear_oldstate(sip);
728 sip = sip->next;
729 }
730 mutex_exit(&rfs4_servinst_lock);
731 }
732
733 /*
734 * Remove all oldstate files referenced by this servinst.
735 */
736 static void
rfs4_dss_clear_oldstate(rfs4_servinst_t * sip)737 rfs4_dss_clear_oldstate(rfs4_servinst_t *sip)
738 {
739 rfs4_oldstate_t *os_head, *osp;
740
741 rw_enter(&sip->oldstate_lock, RW_WRITER);
742 os_head = sip->oldstate;
743
744 if (os_head == NULL) {
745 rw_exit(&sip->oldstate_lock);
746 return;
747 }
748
749 /* skip dummy entry */
750 osp = os_head->next;
751 while (osp != os_head) {
752 char *leaf = osp->ss_pn->leaf;
753 rfs4_oldstate_t *os_next;
754
755 rfs4_dss_remove_leaf(sip, NFS4_DSS_OLDSTATE_LEAF, leaf);
756
757 if (osp->cl_id4.id_val)
758 kmem_free(osp->cl_id4.id_val, osp->cl_id4.id_len);
759 rfs4_ss_pnfree(osp->ss_pn);
760
761 os_next = osp->next;
762 remque(osp);
763 kmem_free(osp, sizeof (rfs4_oldstate_t));
764 osp = os_next;
765 }
766
767 rw_exit(&sip->oldstate_lock);
768 }
769
770 /*
771 * Form the state and oldstate paths, and read in the stable storage files.
772 */
773 void
rfs4_dss_readstate(int npaths,char ** paths)774 rfs4_dss_readstate(int npaths, char **paths)
775 {
776 int i;
777 char *state, *oldstate;
778
779 state = kmem_alloc(MAXPATHLEN, KM_SLEEP);
780 oldstate = kmem_alloc(MAXPATHLEN, KM_SLEEP);
781
782 for (i = 0; i < npaths; i++) {
783 char *path = paths[i];
784
785 (void) sprintf(state, "%s/%s", path, NFS4_DSS_STATE_LEAF);
786 (void) sprintf(oldstate, "%s/%s", path, NFS4_DSS_OLDSTATE_LEAF);
787
788 /*
789 * Populate the current server instance's oldstate list.
790 *
791 * 1. Read stable storage data from old state directory,
792 * leaving its contents alone.
793 *
794 * 2. Read stable storage data from state directory,
795 * and move the latter's contents to old state
796 * directory.
797 */
798 rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, oldstate, NULL);
799 rfs4_ss_oldstate(rfs4_cur_servinst->oldstate, state, oldstate);
800 }
801
802 kmem_free(state, MAXPATHLEN);
803 kmem_free(oldstate, MAXPATHLEN);
804 }
805
806
807 /*
808 * Check if we are still in grace and if the client can be
809 * granted permission to perform reclaims.
810 */
811 void
rfs4_ss_chkclid(rfs4_client_t * cp)812 rfs4_ss_chkclid(rfs4_client_t *cp)
813 {
814 rfs4_servinst_t *sip;
815
816 /*
817 * It should be sufficient to check the oldstate data for just
818 * this client's instance. However, since our per-instance
819 * client grouping is solely temporal, HA-NFSv4 RG failover
820 * might result in clients of the same RG being partitioned into
821 * separate instances.
822 *
823 * Until the client grouping is improved, we must check the
824 * oldstate data for all instances with an active grace period.
825 *
826 * This also serves as the mechanism to remove stale oldstate data.
827 * The first time we check an instance after its grace period has
828 * expired, the oldstate data should be cleared.
829 *
830 * Start at the current instance, and walk the list backwards
831 * to the first.
832 */
833 mutex_enter(&rfs4_servinst_lock);
834 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
835 rfs4_ss_chkclid_sip(cp, sip);
836
837 /* if the above check found this client, we're done */
838 if (cp->rc_can_reclaim)
839 break;
840 }
841 mutex_exit(&rfs4_servinst_lock);
842 }
843
844 static void
rfs4_ss_chkclid_sip(rfs4_client_t * cp,rfs4_servinst_t * sip)845 rfs4_ss_chkclid_sip(rfs4_client_t *cp, rfs4_servinst_t *sip)
846 {
847 rfs4_oldstate_t *osp, *os_head;
848
849 /* short circuit everything if this server instance has no oldstate */
850 rw_enter(&sip->oldstate_lock, RW_READER);
851 os_head = sip->oldstate;
852 rw_exit(&sip->oldstate_lock);
853 if (os_head == NULL)
854 return;
855
856 /*
857 * If this server instance is no longer in a grace period then
858 * the client won't be able to reclaim. No further need for this
859 * instance's oldstate data, so it can be cleared.
860 */
861 if (!rfs4_servinst_in_grace(sip))
862 return;
863
864 /* this instance is still in grace; search for the clientid */
865
866 rw_enter(&sip->oldstate_lock, RW_READER);
867
868 os_head = sip->oldstate;
869 /* skip dummy entry */
870 osp = os_head->next;
871 while (osp != os_head) {
872 if (osp->cl_id4.id_len == cp->rc_nfs_client.id_len) {
873 if (bcmp(osp->cl_id4.id_val, cp->rc_nfs_client.id_val,
874 osp->cl_id4.id_len) == 0) {
875 cp->rc_can_reclaim = 1;
876 break;
877 }
878 }
879 osp = osp->next;
880 }
881
882 rw_exit(&sip->oldstate_lock);
883 }
884
885 /*
886 * Place client information into stable storage: 1/3.
887 * First, generate the leaf filename, from the client's IP address and
888 * the server-generated short-hand clientid.
889 */
890 void
rfs4_ss_clid(rfs4_client_t * cp)891 rfs4_ss_clid(rfs4_client_t *cp)
892 {
893 const char *kinet_ntop6(uchar_t *, char *, size_t);
894 char leaf[MAXNAMELEN], buf[INET6_ADDRSTRLEN];
895 struct sockaddr *ca;
896 uchar_t *b;
897
898 if (rfs4_ss_enabled == 0) {
899 return;
900 }
901
902 buf[0] = 0;
903
904 ca = (struct sockaddr *)&cp->rc_addr;
905
906 /*
907 * Convert the caller's IP address to a dotted string
908 */
909 if (ca->sa_family == AF_INET) {
910 b = (uchar_t *)&((struct sockaddr_in *)ca)->sin_addr;
911 (void) sprintf(buf, "%03d.%03d.%03d.%03d", b[0] & 0xFF,
912 b[1] & 0xFF, b[2] & 0xFF, b[3] & 0xFF);
913 } else if (ca->sa_family == AF_INET6) {
914 struct sockaddr_in6 *sin6;
915
916 sin6 = (struct sockaddr_in6 *)ca;
917 (void) kinet_ntop6((uchar_t *)&sin6->sin6_addr,
918 buf, INET6_ADDRSTRLEN);
919 }
920
921 (void) snprintf(leaf, MAXNAMELEN, "%s-%llx", buf,
922 (longlong_t)cp->rc_clientid);
923 rfs4_ss_clid_write(cp, leaf);
924 }
925
926 /*
927 * Place client information into stable storage: 2/3.
928 * DSS: distributed stable storage: the file may need to be written to
929 * multiple directories.
930 */
931 static void
rfs4_ss_clid_write(rfs4_client_t * cp,char * leaf)932 rfs4_ss_clid_write(rfs4_client_t *cp, char *leaf)
933 {
934 rfs4_servinst_t *sip;
935
936 /*
937 * It should be sufficient to write the leaf file to (all) DSS paths
938 * associated with just this client's instance. However, since our
939 * per-instance client grouping is solely temporal, HA-NFSv4 RG
940 * failover might result in us losing DSS data.
941 *
942 * Until the client grouping is improved, we must write the DSS data
943 * to all instances' paths. Start at the current instance, and
944 * walk the list backwards to the first.
945 */
946 mutex_enter(&rfs4_servinst_lock);
947 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
948 int i, npaths = sip->dss_npaths;
949
950 /* write the leaf file to all DSS paths */
951 for (i = 0; i < npaths; i++) {
952 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
953
954 /* HA-NFSv4 path might have been failed-away from us */
955 if (dss_path == NULL)
956 continue;
957
958 rfs4_ss_clid_write_one(cp, dss_path->path, leaf);
959 }
960 }
961 mutex_exit(&rfs4_servinst_lock);
962 }
963
964 /*
965 * Place client information into stable storage: 3/3.
966 * Write the stable storage data to the requested file.
967 */
968 static void
rfs4_ss_clid_write_one(rfs4_client_t * cp,char * dss_path,char * leaf)969 rfs4_ss_clid_write_one(rfs4_client_t *cp, char *dss_path, char *leaf)
970 {
971 int ioflag;
972 int file_vers = NFS4_SS_VERSION;
973 size_t dirlen;
974 struct uio uio;
975 struct iovec iov[4];
976 char *dir;
977 rfs4_ss_pn_t *ss_pn;
978 vnode_t *vp;
979 nfs_client_id4 *cl_id4 = &(cp->rc_nfs_client);
980
981 /* allow 2 extra bytes for '/' & NUL */
982 dirlen = strlen(dss_path) + strlen(NFS4_DSS_STATE_LEAF) + 2;
983 dir = kmem_alloc(dirlen, KM_SLEEP);
984 (void) sprintf(dir, "%s/%s", dss_path, NFS4_DSS_STATE_LEAF);
985
986 ss_pn = rfs4_ss_pnalloc(dir, leaf);
987 /* rfs4_ss_pnalloc takes its own copy */
988 kmem_free(dir, dirlen);
989 if (ss_pn == NULL)
990 return;
991
992 if (vn_open(ss_pn->pn, UIO_SYSSPACE, FCREAT|FWRITE, 0600, &vp,
993 CRCREAT, 0)) {
994 rfs4_ss_pnfree(ss_pn);
995 return;
996 }
997
998 /*
999 * We need to record leaf - i.e. the filename - so that we know
1000 * what to remove, in the future. However, the dir part of cp->ss_pn
1001 * should never be referenced directly, since it's potentially only
1002 * one of several paths with this leaf in it.
1003 */
1004 if (cp->rc_ss_pn != NULL) {
1005 if (strcmp(cp->rc_ss_pn->leaf, leaf) == 0) {
1006 /* we've already recorded *this* leaf */
1007 rfs4_ss_pnfree(ss_pn);
1008 } else {
1009 /* replace with this leaf */
1010 rfs4_ss_pnfree(cp->rc_ss_pn);
1011 cp->rc_ss_pn = ss_pn;
1012 }
1013 } else {
1014 cp->rc_ss_pn = ss_pn;
1015 }
1016
1017 /*
1018 * Build a scatter list that points to the nfs_client_id4
1019 */
1020 iov[0].iov_base = (caddr_t)&file_vers;
1021 iov[0].iov_len = sizeof (int);
1022 iov[1].iov_base = (caddr_t)&(cl_id4->verifier);
1023 iov[1].iov_len = NFS4_VERIFIER_SIZE;
1024 iov[2].iov_base = (caddr_t)&(cl_id4->id_len);
1025 iov[2].iov_len = sizeof (uint_t);
1026 iov[3].iov_base = (caddr_t)cl_id4->id_val;
1027 iov[3].iov_len = cl_id4->id_len;
1028
1029 uio.uio_iov = iov;
1030 uio.uio_iovcnt = 4;
1031 uio.uio_loffset = 0;
1032 uio.uio_segflg = UIO_SYSSPACE;
1033 uio.uio_llimit = (rlim64_t)MAXOFFSET_T;
1034 uio.uio_resid = cl_id4->id_len + sizeof (int) +
1035 NFS4_VERIFIER_SIZE + sizeof (uint_t);
1036
1037 ioflag = uio.uio_fmode = (FWRITE|FSYNC);
1038 uio.uio_extflg = UIO_COPY_DEFAULT;
1039
1040 (void) VOP_RWLOCK(vp, V_WRITELOCK_TRUE, NULL);
1041 /* write the full client id to the file. */
1042 (void) VOP_WRITE(vp, &uio, ioflag, CRED(), NULL);
1043 VOP_RWUNLOCK(vp, V_WRITELOCK_TRUE, NULL);
1044
1045 (void) VOP_CLOSE(vp, FWRITE, 1, (offset_t)0, CRED(), NULL);
1046 VN_RELE(vp);
1047 }
1048
1049 /*
1050 * DSS: distributed stable storage.
1051 * Unpack the list of paths passed by nfsd.
1052 * Use nvlist_alloc(9F) to manage the data.
1053 * The caller is responsible for allocating and freeing the buffer.
1054 */
1055 int
rfs4_dss_setpaths(char * buf,size_t buflen)1056 rfs4_dss_setpaths(char *buf, size_t buflen)
1057 {
1058 int error;
1059
1060 /*
1061 * If this is a "warm start", i.e. we previously had DSS paths,
1062 * preserve the old paths.
1063 */
1064 if (rfs4_dss_paths != NULL) {
1065 /*
1066 * Before we lose the ptr, destroy the nvlist and pathnames
1067 * array from the warm start before this one.
1068 */
1069 nvlist_free(rfs4_dss_oldpaths);
1070 rfs4_dss_oldpaths = rfs4_dss_paths;
1071 }
1072
1073 /* unpack the buffer into a searchable nvlist */
1074 error = nvlist_unpack(buf, buflen, &rfs4_dss_paths, KM_SLEEP);
1075 if (error)
1076 return (error);
1077
1078 /*
1079 * Search the nvlist for the pathnames nvpair (which is the only nvpair
1080 * in the list, and record its location.
1081 */
1082 error = nvlist_lookup_string_array(rfs4_dss_paths, NFS4_DSS_NVPAIR_NAME,
1083 &rfs4_dss_newpaths, &rfs4_dss_numnewpaths);
1084 return (error);
1085 }
1086
1087 /*
1088 * Ultimately the nfssys() call NFS4_CLR_STATE endsup here
1089 * to find and mark the client for forced expire.
1090 */
1091 static void
rfs4_client_scrub(rfs4_entry_t ent,void * arg)1092 rfs4_client_scrub(rfs4_entry_t ent, void *arg)
1093 {
1094 rfs4_client_t *cp = (rfs4_client_t *)ent;
1095 struct nfs4clrst_args *clr = arg;
1096 struct sockaddr_in6 *ent_sin6;
1097 struct in6_addr clr_in6;
1098 struct sockaddr_in *ent_sin;
1099 struct in_addr clr_in;
1100
1101 if (clr->addr_type != cp->rc_addr.ss_family) {
1102 return;
1103 }
1104
1105 switch (clr->addr_type) {
1106
1107 case AF_INET6:
1108 /* copyin the address from user space */
1109 if (copyin(clr->ap, &clr_in6, sizeof (clr_in6))) {
1110 break;
1111 }
1112
1113 ent_sin6 = (struct sockaddr_in6 *)&cp->rc_addr;
1114
1115 /*
1116 * now compare, and if equivalent mark entry
1117 * for forced expiration
1118 */
1119 if (IN6_ARE_ADDR_EQUAL(&ent_sin6->sin6_addr, &clr_in6)) {
1120 cp->rc_forced_expire = 1;
1121 }
1122 break;
1123
1124 case AF_INET:
1125 /* copyin the address from user space */
1126 if (copyin(clr->ap, &clr_in, sizeof (clr_in))) {
1127 break;
1128 }
1129
1130 ent_sin = (struct sockaddr_in *)&cp->rc_addr;
1131
1132 /*
1133 * now compare, and if equivalent mark entry
1134 * for forced expiration
1135 */
1136 if (ent_sin->sin_addr.s_addr == clr_in.s_addr) {
1137 cp->rc_forced_expire = 1;
1138 }
1139 break;
1140
1141 default:
1142 /* force this assert to fail */
1143 ASSERT(clr->addr_type != clr->addr_type);
1144 }
1145 }
1146
1147 /*
1148 * This is called from nfssys() in order to clear server state
1149 * for the specified client IP Address.
1150 */
1151 void
rfs4_clear_client_state(struct nfs4clrst_args * clr)1152 rfs4_clear_client_state(struct nfs4clrst_args *clr)
1153 {
1154 (void) rfs4_dbe_walk(rfs4_client_tab, rfs4_client_scrub, clr);
1155 }
1156
1157 /*
1158 * Used to initialize the NFSv4 server's state or database. All of
1159 * the tables are created and timers are set. Only called when NFSv4
1160 * service is provided.
1161 */
1162 void
rfs4_state_init()1163 rfs4_state_init()
1164 {
1165 int start_grace;
1166 extern boolean_t rfs4_cpr_callb(void *, int);
1167 char *dss_path = NFS4_DSS_VAR_DIR;
1168 time_t start_time;
1169
1170 mutex_enter(&rfs4_state_lock);
1171
1172 /*
1173 * If the server state database has already been initialized,
1174 * skip it
1175 */
1176 if (rfs4_server_state != NULL) {
1177 mutex_exit(&rfs4_state_lock);
1178 return;
1179 }
1180
1181 rw_init(&rfs4_findclient_lock, NULL, RW_DEFAULT, NULL);
1182
1183 /*
1184 * Set the boot time. If the server
1185 * has been restarted quickly and has had the opportunity to
1186 * service clients, then the start_time needs to be bumped
1187 * regardless. A small window but it exists...
1188 */
1189 start_time = gethrestime_sec();
1190 if (rfs4_start_time < start_time)
1191 rfs4_start_time = start_time;
1192 else
1193 rfs4_start_time++;
1194
1195 /* DSS: distributed stable storage: initialise served paths list */
1196 rfs4_dss_pathlist = NULL;
1197
1198 /*
1199 * Create the first server instance, or a new one if the server has
1200 * been restarted; see above comments on rfs4_start_time. Don't
1201 * start its grace period; that will be done later, to maximise the
1202 * clients' recovery window.
1203 */
1204 start_grace = 0;
1205 rfs4_servinst_create(start_grace, 1, &dss_path);
1206
1207 /* reset the "first NFSv4 request" status */
1208 rfs4_seen_first_compound = 0;
1209
1210 /*
1211 * Add a CPR callback so that we can update client
1212 * access times to extend the lease after a suspend
1213 * and resume (using the same class as rpcmod/connmgr)
1214 */
1215 cpr_id = callb_add(rfs4_cpr_callb, 0, CB_CL_CPR_RPC, "rfs4");
1216
1217 /* set the various cache timers for table creation */
1218 if (rfs4_client_cache_time == 0)
1219 rfs4_client_cache_time = CLIENT_CACHE_TIME;
1220 if (rfs4_openowner_cache_time == 0)
1221 rfs4_openowner_cache_time = OPENOWNER_CACHE_TIME;
1222 if (rfs4_state_cache_time == 0)
1223 rfs4_state_cache_time = STATE_CACHE_TIME;
1224 if (rfs4_lo_state_cache_time == 0)
1225 rfs4_lo_state_cache_time = LO_STATE_CACHE_TIME;
1226 if (rfs4_lockowner_cache_time == 0)
1227 rfs4_lockowner_cache_time = LOCKOWNER_CACHE_TIME;
1228 if (rfs4_file_cache_time == 0)
1229 rfs4_file_cache_time = FILE_CACHE_TIME;
1230 if (rfs4_deleg_state_cache_time == 0)
1231 rfs4_deleg_state_cache_time = DELEG_STATE_CACHE_TIME;
1232
1233 /* Create the overall database to hold all server state */
1234 rfs4_server_state = rfs4_database_create(rfs4_database_debug);
1235
1236 /* Now create the individual tables */
1237 rfs4_client_cache_time *= rfs4_lease_time;
1238 rfs4_client_tab = rfs4_table_create(rfs4_server_state,
1239 "Client",
1240 rfs4_client_cache_time,
1241 2,
1242 rfs4_client_create,
1243 rfs4_client_destroy,
1244 rfs4_client_expiry,
1245 sizeof (rfs4_client_t),
1246 TABSIZE,
1247 MAXTABSZ/8, 100);
1248 rfs4_nfsclnt_idx = rfs4_index_create(rfs4_client_tab,
1249 "nfs_client_id4", nfsclnt_hash,
1250 nfsclnt_compare, nfsclnt_mkkey,
1251 TRUE);
1252 rfs4_clientid_idx = rfs4_index_create(rfs4_client_tab,
1253 "client_id", clientid_hash,
1254 clientid_compare, clientid_mkkey,
1255 FALSE);
1256
1257 rfs4_clntip_cache_time = 86400 * 365; /* about a year */
1258 rfs4_clntip_tab = rfs4_table_create(rfs4_server_state,
1259 "ClntIP",
1260 rfs4_clntip_cache_time,
1261 1,
1262 rfs4_clntip_create,
1263 rfs4_clntip_destroy,
1264 rfs4_clntip_expiry,
1265 sizeof (rfs4_clntip_t),
1266 TABSIZE,
1267 MAXTABSZ, 100);
1268 rfs4_clntip_idx = rfs4_index_create(rfs4_clntip_tab,
1269 "client_ip", clntip_hash,
1270 clntip_compare, clntip_mkkey,
1271 TRUE);
1272
1273 rfs4_openowner_cache_time *= rfs4_lease_time;
1274 rfs4_openowner_tab = rfs4_table_create(rfs4_server_state,
1275 "OpenOwner",
1276 rfs4_openowner_cache_time,
1277 1,
1278 rfs4_openowner_create,
1279 rfs4_openowner_destroy,
1280 rfs4_openowner_expiry,
1281 sizeof (rfs4_openowner_t),
1282 TABSIZE,
1283 MAXTABSZ, 100);
1284 rfs4_openowner_idx = rfs4_index_create(rfs4_openowner_tab,
1285 "open_owner4", openowner_hash,
1286 openowner_compare,
1287 openowner_mkkey, TRUE);
1288
1289 rfs4_state_cache_time *= rfs4_lease_time;
1290 rfs4_state_tab = rfs4_table_create(rfs4_server_state,
1291 "OpenStateID",
1292 rfs4_state_cache_time,
1293 3,
1294 rfs4_state_create,
1295 rfs4_state_destroy,
1296 rfs4_state_expiry,
1297 sizeof (rfs4_state_t),
1298 TABSIZE,
1299 MAXTABSZ, 100);
1300
1301 rfs4_state_owner_file_idx = rfs4_index_create(rfs4_state_tab,
1302 "Openowner-File",
1303 state_owner_file_hash,
1304 state_owner_file_compare,
1305 state_owner_file_mkkey, TRUE);
1306
1307 rfs4_state_idx = rfs4_index_create(rfs4_state_tab,
1308 "State-id", state_hash,
1309 state_compare, state_mkkey, FALSE);
1310
1311 rfs4_state_file_idx = rfs4_index_create(rfs4_state_tab,
1312 "File", state_file_hash,
1313 state_file_compare, state_file_mkkey,
1314 FALSE);
1315
1316 rfs4_lo_state_cache_time *= rfs4_lease_time;
1317 rfs4_lo_state_tab = rfs4_table_create(rfs4_server_state,
1318 "LockStateID",
1319 rfs4_lo_state_cache_time,
1320 2,
1321 rfs4_lo_state_create,
1322 rfs4_lo_state_destroy,
1323 rfs4_lo_state_expiry,
1324 sizeof (rfs4_lo_state_t),
1325 TABSIZE,
1326 MAXTABSZ, 100);
1327
1328 rfs4_lo_state_owner_idx = rfs4_index_create(rfs4_lo_state_tab,
1329 "lockownerxstate",
1330 lo_state_lo_hash,
1331 lo_state_lo_compare,
1332 lo_state_lo_mkkey, TRUE);
1333
1334 rfs4_lo_state_idx = rfs4_index_create(rfs4_lo_state_tab,
1335 "State-id",
1336 lo_state_hash, lo_state_compare,
1337 lo_state_mkkey, FALSE);
1338
1339 rfs4_lockowner_cache_time *= rfs4_lease_time;
1340
1341 rfs4_lockowner_tab = rfs4_table_create(rfs4_server_state,
1342 "Lockowner",
1343 rfs4_lockowner_cache_time,
1344 2,
1345 rfs4_lockowner_create,
1346 rfs4_lockowner_destroy,
1347 rfs4_lockowner_expiry,
1348 sizeof (rfs4_lockowner_t),
1349 TABSIZE,
1350 MAXTABSZ, 100);
1351
1352 rfs4_lockowner_idx = rfs4_index_create(rfs4_lockowner_tab,
1353 "lock_owner4", lockowner_hash,
1354 lockowner_compare,
1355 lockowner_mkkey, TRUE);
1356
1357 rfs4_lockowner_pid_idx = rfs4_index_create(rfs4_lockowner_tab,
1358 "pid", pid_hash,
1359 pid_compare, pid_mkkey,
1360 FALSE);
1361
1362 rfs4_file_cache_time *= rfs4_lease_time;
1363 rfs4_file_tab = rfs4_table_create(rfs4_server_state,
1364 "File",
1365 rfs4_file_cache_time,
1366 1,
1367 rfs4_file_create,
1368 rfs4_file_destroy,
1369 NULL,
1370 sizeof (rfs4_file_t),
1371 TABSIZE,
1372 MAXTABSZ, -1);
1373
1374 rfs4_file_idx = rfs4_index_create(rfs4_file_tab,
1375 "Filehandle", file_hash,
1376 file_compare, file_mkkey, TRUE);
1377
1378 rfs4_deleg_state_cache_time *= rfs4_lease_time;
1379 rfs4_deleg_state_tab = rfs4_table_create(rfs4_server_state,
1380 "DelegStateID",
1381 rfs4_deleg_state_cache_time,
1382 2,
1383 rfs4_deleg_state_create,
1384 rfs4_deleg_state_destroy,
1385 rfs4_deleg_state_expiry,
1386 sizeof (rfs4_deleg_state_t),
1387 TABSIZE,
1388 MAXTABSZ, 100);
1389 rfs4_deleg_idx = rfs4_index_create(rfs4_deleg_state_tab,
1390 "DelegByFileClient",
1391 deleg_hash,
1392 deleg_compare,
1393 deleg_mkkey, TRUE);
1394
1395 rfs4_deleg_state_idx = rfs4_index_create(rfs4_deleg_state_tab,
1396 "DelegState",
1397 deleg_state_hash,
1398 deleg_state_compare,
1399 deleg_state_mkkey, FALSE);
1400
1401 /*
1402 * Init the stable storage.
1403 */
1404 rfs4_ss_init();
1405
1406 rfs4_client_clrst = rfs4_clear_client_state;
1407
1408 mutex_exit(&rfs4_state_lock);
1409 }
1410
1411
1412 /*
1413 * Used at server shutdown to cleanup all of the NFSv4 server's structures
1414 * and other state.
1415 */
1416 void
rfs4_state_fini()1417 rfs4_state_fini()
1418 {
1419 rfs4_database_t *dbp;
1420
1421 mutex_enter(&rfs4_state_lock);
1422
1423 if (rfs4_server_state == NULL) {
1424 mutex_exit(&rfs4_state_lock);
1425 return;
1426 }
1427
1428 rfs4_client_clrst = NULL;
1429
1430 rfs4_set_deleg_policy(SRV_NEVER_DELEGATE);
1431 dbp = rfs4_server_state;
1432 rfs4_server_state = NULL;
1433
1434 /*
1435 * Cleanup the CPR callback.
1436 */
1437 if (cpr_id)
1438 (void) callb_delete(cpr_id);
1439
1440 rw_destroy(&rfs4_findclient_lock);
1441
1442 /* First stop all of the reaper threads in the database */
1443 rfs4_database_shutdown(dbp);
1444 /* clean up any dangling stable storage structures */
1445 rfs4_ss_fini();
1446 /* Now actually destroy/release the database and its tables */
1447 rfs4_database_destroy(dbp);
1448
1449 /* Reset the cache timers for next time */
1450 rfs4_client_cache_time = 0;
1451 rfs4_openowner_cache_time = 0;
1452 rfs4_state_cache_time = 0;
1453 rfs4_lo_state_cache_time = 0;
1454 rfs4_lockowner_cache_time = 0;
1455 rfs4_file_cache_time = 0;
1456 rfs4_deleg_state_cache_time = 0;
1457
1458 mutex_exit(&rfs4_state_lock);
1459
1460 /* destroy server instances and current instance ptr */
1461 rfs4_servinst_destroy_all();
1462
1463 /* reset the "first NFSv4 request" status */
1464 rfs4_seen_first_compound = 0;
1465
1466 /* DSS: distributed stable storage */
1467 nvlist_free(rfs4_dss_oldpaths);
1468 nvlist_free(rfs4_dss_paths);
1469 rfs4_dss_paths = rfs4_dss_oldpaths = NULL;
1470 }
1471
1472 typedef union {
1473 struct {
1474 uint32_t start_time;
1475 uint32_t c_id;
1476 } impl_id;
1477 clientid4 id4;
1478 } cid;
1479
1480 static int foreign_stateid(stateid_t *id);
1481 static int foreign_clientid(cid *cidp);
1482 static void embed_nodeid(cid *cidp);
1483
1484 typedef union {
1485 struct {
1486 uint32_t c_id;
1487 uint32_t gen_num;
1488 } cv_impl;
1489 verifier4 confirm_verf;
1490 } scid_confirm_verf;
1491
1492 static uint32_t
clientid_hash(void * key)1493 clientid_hash(void *key)
1494 {
1495 cid *idp = key;
1496
1497 return (idp->impl_id.c_id);
1498 }
1499
1500 static bool_t
clientid_compare(rfs4_entry_t entry,void * key)1501 clientid_compare(rfs4_entry_t entry, void *key)
1502 {
1503 rfs4_client_t *cp = (rfs4_client_t *)entry;
1504 clientid4 *idp = key;
1505
1506 return (*idp == cp->rc_clientid);
1507 }
1508
1509 static void *
clientid_mkkey(rfs4_entry_t entry)1510 clientid_mkkey(rfs4_entry_t entry)
1511 {
1512 rfs4_client_t *cp = (rfs4_client_t *)entry;
1513
1514 return (&cp->rc_clientid);
1515 }
1516
1517 static uint32_t
nfsclnt_hash(void * key)1518 nfsclnt_hash(void *key)
1519 {
1520 nfs_client_id4 *client = key;
1521 int i;
1522 uint32_t hash = 0;
1523
1524 for (i = 0; i < client->id_len; i++) {
1525 hash <<= 1;
1526 hash += (uint_t)client->id_val[i];
1527 }
1528 return (hash);
1529 }
1530
1531
1532 static bool_t
nfsclnt_compare(rfs4_entry_t entry,void * key)1533 nfsclnt_compare(rfs4_entry_t entry, void *key)
1534 {
1535 rfs4_client_t *cp = (rfs4_client_t *)entry;
1536 nfs_client_id4 *nfs_client = key;
1537
1538 if (cp->rc_nfs_client.id_len != nfs_client->id_len)
1539 return (FALSE);
1540
1541 return (bcmp(cp->rc_nfs_client.id_val, nfs_client->id_val,
1542 nfs_client->id_len) == 0);
1543 }
1544
1545 static void *
nfsclnt_mkkey(rfs4_entry_t entry)1546 nfsclnt_mkkey(rfs4_entry_t entry)
1547 {
1548 rfs4_client_t *cp = (rfs4_client_t *)entry;
1549
1550 return (&cp->rc_nfs_client);
1551 }
1552
1553 static bool_t
rfs4_client_expiry(rfs4_entry_t u_entry)1554 rfs4_client_expiry(rfs4_entry_t u_entry)
1555 {
1556 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1557 bool_t cp_expired;
1558
1559 if (rfs4_dbe_is_invalid(cp->rc_dbe)) {
1560 cp->rc_ss_remove = 1;
1561 return (TRUE);
1562 }
1563 /*
1564 * If the sysadmin has used clear_locks for this
1565 * entry then forced_expire will be set and we
1566 * want this entry to be reaped. Or the entry
1567 * has exceeded its lease period.
1568 */
1569 cp_expired = (cp->rc_forced_expire ||
1570 (gethrestime_sec() - cp->rc_last_access
1571 > rfs4_lease_time));
1572
1573 if (!cp->rc_ss_remove && cp_expired)
1574 cp->rc_ss_remove = 1;
1575 return (cp_expired);
1576 }
1577
1578 /*
1579 * Remove the leaf file from all distributed stable storage paths.
1580 */
1581 static void
rfs4_dss_remove_cpleaf(rfs4_client_t * cp)1582 rfs4_dss_remove_cpleaf(rfs4_client_t *cp)
1583 {
1584 rfs4_servinst_t *sip;
1585 char *leaf = cp->rc_ss_pn->leaf;
1586
1587 /*
1588 * since the state files are written to all DSS
1589 * paths we must remove this leaf file instance
1590 * from all server instances.
1591 */
1592
1593 mutex_enter(&rfs4_servinst_lock);
1594 for (sip = rfs4_cur_servinst; sip != NULL; sip = sip->prev) {
1595 /* remove the leaf file associated with this server instance */
1596 rfs4_dss_remove_leaf(sip, NFS4_DSS_STATE_LEAF, leaf);
1597 }
1598 mutex_exit(&rfs4_servinst_lock);
1599 }
1600
1601 static void
rfs4_dss_remove_leaf(rfs4_servinst_t * sip,char * dir_leaf,char * leaf)1602 rfs4_dss_remove_leaf(rfs4_servinst_t *sip, char *dir_leaf, char *leaf)
1603 {
1604 int i, npaths = sip->dss_npaths;
1605
1606 for (i = 0; i < npaths; i++) {
1607 rfs4_dss_path_t *dss_path = sip->dss_paths[i];
1608 char *path, *dir;
1609 size_t pathlen;
1610
1611 /* the HA-NFSv4 path might have been failed-over away from us */
1612 if (dss_path == NULL)
1613 continue;
1614
1615 dir = dss_path->path;
1616
1617 /* allow 3 extra bytes for two '/' & a NUL */
1618 pathlen = strlen(dir) + strlen(dir_leaf) + strlen(leaf) + 3;
1619 path = kmem_alloc(pathlen, KM_SLEEP);
1620 (void) sprintf(path, "%s/%s/%s", dir, dir_leaf, leaf);
1621
1622 (void) vn_remove(path, UIO_SYSSPACE, RMFILE);
1623
1624 kmem_free(path, pathlen);
1625 }
1626 }
1627
1628 static void
rfs4_client_destroy(rfs4_entry_t u_entry)1629 rfs4_client_destroy(rfs4_entry_t u_entry)
1630 {
1631 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1632
1633 mutex_destroy(cp->rc_cbinfo.cb_lock);
1634 cv_destroy(cp->rc_cbinfo.cb_cv);
1635 cv_destroy(cp->rc_cbinfo.cb_cv_nullcaller);
1636 list_destroy(&cp->rc_openownerlist);
1637
1638 /* free callback info */
1639 rfs4_cbinfo_free(&cp->rc_cbinfo);
1640
1641 if (cp->rc_cp_confirmed)
1642 rfs4_client_rele(cp->rc_cp_confirmed);
1643
1644 if (cp->rc_ss_pn) {
1645 /* check if the stable storage files need to be removed */
1646 if (cp->rc_ss_remove)
1647 rfs4_dss_remove_cpleaf(cp);
1648 rfs4_ss_pnfree(cp->rc_ss_pn);
1649 }
1650
1651 /* Free the client supplied client id */
1652 kmem_free(cp->rc_nfs_client.id_val, cp->rc_nfs_client.id_len);
1653
1654 if (cp->rc_sysidt != LM_NOSYSID)
1655 lm_free_sysidt(cp->rc_sysidt);
1656 }
1657
1658 static bool_t
rfs4_client_create(rfs4_entry_t u_entry,void * arg)1659 rfs4_client_create(rfs4_entry_t u_entry, void *arg)
1660 {
1661 rfs4_client_t *cp = (rfs4_client_t *)u_entry;
1662 nfs_client_id4 *client = (nfs_client_id4 *)arg;
1663 struct sockaddr *ca;
1664 cid *cidp;
1665 scid_confirm_verf *scvp;
1666
1667 /* Get a clientid to give to the client */
1668 cidp = (cid *)&cp->rc_clientid;
1669 cidp->impl_id.start_time = rfs4_start_time;
1670 cidp->impl_id.c_id = (uint32_t)rfs4_dbe_getid(cp->rc_dbe);
1671
1672 /* If we are booted as a cluster node, embed our nodeid */
1673 if (cluster_bootflags & CLUSTER_BOOTED)
1674 embed_nodeid(cidp);
1675
1676 /* Allocate and copy client's client id value */
1677 cp->rc_nfs_client.id_val = kmem_alloc(client->id_len, KM_SLEEP);
1678 cp->rc_nfs_client.id_len = client->id_len;
1679 bcopy(client->id_val, cp->rc_nfs_client.id_val, client->id_len);
1680 cp->rc_nfs_client.verifier = client->verifier;
1681
1682 /* Copy client's IP address */
1683 ca = client->cl_addr;
1684 if (ca->sa_family == AF_INET)
1685 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in));
1686 else if (ca->sa_family == AF_INET6)
1687 bcopy(ca, &cp->rc_addr, sizeof (struct sockaddr_in6));
1688 cp->rc_nfs_client.cl_addr = (struct sockaddr *)&cp->rc_addr;
1689
1690 /* Init the value for the SETCLIENTID_CONFIRM verifier */
1691 scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1692 scvp->cv_impl.c_id = cidp->impl_id.c_id;
1693 scvp->cv_impl.gen_num = 0;
1694
1695 /* An F_UNLKSYS has been done for this client */
1696 cp->rc_unlksys_completed = FALSE;
1697
1698 /* We need the client to ack us */
1699 cp->rc_need_confirm = TRUE;
1700 cp->rc_cp_confirmed = NULL;
1701
1702 /* TRUE all the time until the callback path actually fails */
1703 cp->rc_cbinfo.cb_notified_of_cb_path_down = TRUE;
1704
1705 /* Initialize the access time to now */
1706 cp->rc_last_access = gethrestime_sec();
1707
1708 cp->rc_cr_set = NULL;
1709
1710 cp->rc_sysidt = LM_NOSYSID;
1711
1712 list_create(&cp->rc_openownerlist, sizeof (rfs4_openowner_t),
1713 offsetof(rfs4_openowner_t, ro_node));
1714
1715 /* set up the callback control structure */
1716 cp->rc_cbinfo.cb_state = CB_UNINIT;
1717 mutex_init(cp->rc_cbinfo.cb_lock, NULL, MUTEX_DEFAULT, NULL);
1718 cv_init(cp->rc_cbinfo.cb_cv, NULL, CV_DEFAULT, NULL);
1719 cv_init(cp->rc_cbinfo.cb_cv_nullcaller, NULL, CV_DEFAULT, NULL);
1720
1721 /*
1722 * Associate the client_t with the current server instance.
1723 * The hold is solely to satisfy the calling requirement of
1724 * rfs4_servinst_assign(). In this case it's not strictly necessary.
1725 */
1726 rfs4_dbe_hold(cp->rc_dbe);
1727 rfs4_servinst_assign(cp, rfs4_cur_servinst);
1728 rfs4_dbe_rele(cp->rc_dbe);
1729
1730 return (TRUE);
1731 }
1732
1733 /*
1734 * Caller wants to generate/update the setclientid_confirm verifier
1735 * associated with a client. This is done during the SETCLIENTID
1736 * processing.
1737 */
1738 void
rfs4_client_scv_next(rfs4_client_t * cp)1739 rfs4_client_scv_next(rfs4_client_t *cp)
1740 {
1741 scid_confirm_verf *scvp;
1742
1743 /* Init the value for the SETCLIENTID_CONFIRM verifier */
1744 scvp = (scid_confirm_verf *)&cp->rc_confirm_verf;
1745 scvp->cv_impl.gen_num++;
1746 }
1747
1748 void
rfs4_client_rele(rfs4_client_t * cp)1749 rfs4_client_rele(rfs4_client_t *cp)
1750 {
1751 rfs4_dbe_rele(cp->rc_dbe);
1752 }
1753
1754 rfs4_client_t *
rfs4_findclient(nfs_client_id4 * client,bool_t * create,rfs4_client_t * oldcp)1755 rfs4_findclient(nfs_client_id4 *client, bool_t *create, rfs4_client_t *oldcp)
1756 {
1757 rfs4_client_t *cp;
1758
1759
1760 if (oldcp) {
1761 rw_enter(&rfs4_findclient_lock, RW_WRITER);
1762 rfs4_dbe_hide(oldcp->rc_dbe);
1763 } else {
1764 rw_enter(&rfs4_findclient_lock, RW_READER);
1765 }
1766
1767 cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_nfsclnt_idx, client,
1768 create, (void *)client, RFS4_DBS_VALID);
1769
1770 if (oldcp)
1771 rfs4_dbe_unhide(oldcp->rc_dbe);
1772
1773 rw_exit(&rfs4_findclient_lock);
1774
1775 return (cp);
1776 }
1777
1778 rfs4_client_t *
rfs4_findclient_by_id(clientid4 clientid,bool_t find_unconfirmed)1779 rfs4_findclient_by_id(clientid4 clientid, bool_t find_unconfirmed)
1780 {
1781 rfs4_client_t *cp;
1782 bool_t create = FALSE;
1783 cid *cidp = (cid *)&clientid;
1784
1785 /* If we're a cluster and the nodeid isn't right, short-circuit */
1786 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
1787 return (NULL);
1788
1789 rw_enter(&rfs4_findclient_lock, RW_READER);
1790
1791 cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx, &clientid,
1792 &create, NULL, RFS4_DBS_VALID);
1793
1794 rw_exit(&rfs4_findclient_lock);
1795
1796 if (cp && cp->rc_need_confirm && find_unconfirmed == FALSE) {
1797 rfs4_client_rele(cp);
1798 return (NULL);
1799 } else {
1800 return (cp);
1801 }
1802 }
1803
1804 static uint32_t
clntip_hash(void * key)1805 clntip_hash(void *key)
1806 {
1807 struct sockaddr *addr = key;
1808 int i, len = 0;
1809 uint32_t hash = 0;
1810 char *ptr;
1811
1812 if (addr->sa_family == AF_INET) {
1813 struct sockaddr_in *a = (struct sockaddr_in *)addr;
1814 len = sizeof (struct in_addr);
1815 ptr = (char *)&a->sin_addr;
1816 } else if (addr->sa_family == AF_INET6) {
1817 struct sockaddr_in6 *a = (struct sockaddr_in6 *)addr;
1818 len = sizeof (struct in6_addr);
1819 ptr = (char *)&a->sin6_addr;
1820 } else
1821 return (0);
1822
1823 for (i = 0; i < len; i++) {
1824 hash <<= 1;
1825 hash += (uint_t)ptr[i];
1826 }
1827 return (hash);
1828 }
1829
1830 static bool_t
clntip_compare(rfs4_entry_t entry,void * key)1831 clntip_compare(rfs4_entry_t entry, void *key)
1832 {
1833 rfs4_clntip_t *cp = (rfs4_clntip_t *)entry;
1834 struct sockaddr *addr = key;
1835 int len = 0;
1836 char *p1, *p2;
1837
1838 if (addr->sa_family == AF_INET) {
1839 struct sockaddr_in *a1 = (struct sockaddr_in *)&cp->ri_addr;
1840 struct sockaddr_in *a2 = (struct sockaddr_in *)addr;
1841 len = sizeof (struct in_addr);
1842 p1 = (char *)&a1->sin_addr;
1843 p2 = (char *)&a2->sin_addr;
1844 } else if (addr->sa_family == AF_INET6) {
1845 struct sockaddr_in6 *a1 = (struct sockaddr_in6 *)&cp->ri_addr;
1846 struct sockaddr_in6 *a2 = (struct sockaddr_in6 *)addr;
1847 len = sizeof (struct in6_addr);
1848 p1 = (char *)&a1->sin6_addr;
1849 p2 = (char *)&a2->sin6_addr;
1850 } else
1851 return (0);
1852
1853 return (bcmp(p1, p2, len) == 0);
1854 }
1855
1856 static void *
clntip_mkkey(rfs4_entry_t entry)1857 clntip_mkkey(rfs4_entry_t entry)
1858 {
1859 rfs4_clntip_t *cp = (rfs4_clntip_t *)entry;
1860
1861 return (&cp->ri_addr);
1862 }
1863
1864 static bool_t
rfs4_clntip_expiry(rfs4_entry_t u_entry)1865 rfs4_clntip_expiry(rfs4_entry_t u_entry)
1866 {
1867 rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1868
1869 if (rfs4_dbe_is_invalid(cp->ri_dbe))
1870 return (TRUE);
1871 return (FALSE);
1872 }
1873
1874 /* ARGSUSED */
1875 static void
rfs4_clntip_destroy(rfs4_entry_t u_entry)1876 rfs4_clntip_destroy(rfs4_entry_t u_entry)
1877 {
1878 }
1879
1880 static bool_t
rfs4_clntip_create(rfs4_entry_t u_entry,void * arg)1881 rfs4_clntip_create(rfs4_entry_t u_entry, void *arg)
1882 {
1883 rfs4_clntip_t *cp = (rfs4_clntip_t *)u_entry;
1884 struct sockaddr *ca = (struct sockaddr *)arg;
1885
1886 /* Copy client's IP address */
1887 if (ca->sa_family == AF_INET)
1888 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in));
1889 else if (ca->sa_family == AF_INET6)
1890 bcopy(ca, &cp->ri_addr, sizeof (struct sockaddr_in6));
1891 else
1892 return (FALSE);
1893 cp->ri_no_referrals = 1;
1894
1895 return (TRUE);
1896 }
1897
1898 rfs4_clntip_t *
rfs4_find_clntip(struct sockaddr * addr,bool_t * create)1899 rfs4_find_clntip(struct sockaddr *addr, bool_t *create)
1900 {
1901 rfs4_clntip_t *cp;
1902
1903 rw_enter(&rfs4_findclient_lock, RW_READER);
1904
1905 cp = (rfs4_clntip_t *)rfs4_dbsearch(rfs4_clntip_idx, addr,
1906 create, addr, RFS4_DBS_VALID);
1907
1908 rw_exit(&rfs4_findclient_lock);
1909
1910 return (cp);
1911 }
1912
1913 void
rfs4_invalidate_clntip(struct sockaddr * addr)1914 rfs4_invalidate_clntip(struct sockaddr *addr)
1915 {
1916 rfs4_clntip_t *cp;
1917 bool_t create = FALSE;
1918
1919 rw_enter(&rfs4_findclient_lock, RW_READER);
1920
1921 cp = (rfs4_clntip_t *)rfs4_dbsearch(rfs4_clntip_idx, addr,
1922 &create, NULL, RFS4_DBS_VALID);
1923 if (cp == NULL) {
1924 rw_exit(&rfs4_findclient_lock);
1925 return;
1926 }
1927 rfs4_dbe_invalidate(cp->ri_dbe);
1928 rfs4_dbe_rele(cp->ri_dbe);
1929
1930 rw_exit(&rfs4_findclient_lock);
1931 }
1932
1933 bool_t
rfs4_lease_expired(rfs4_client_t * cp)1934 rfs4_lease_expired(rfs4_client_t *cp)
1935 {
1936 bool_t rc;
1937
1938 rfs4_dbe_lock(cp->rc_dbe);
1939
1940 /*
1941 * If the admin has executed clear_locks for this
1942 * client id, force expire will be set, so no need
1943 * to calculate anything because it's "outa here".
1944 */
1945 if (cp->rc_forced_expire) {
1946 rc = TRUE;
1947 } else {
1948 rc = (gethrestime_sec() - cp->rc_last_access > rfs4_lease_time);
1949 }
1950
1951 /*
1952 * If the lease has expired we will also want
1953 * to remove any stable storage state data. So
1954 * mark the client id accordingly.
1955 */
1956 if (!cp->rc_ss_remove)
1957 cp->rc_ss_remove = (rc == TRUE);
1958
1959 rfs4_dbe_unlock(cp->rc_dbe);
1960
1961 return (rc);
1962 }
1963
1964 void
rfs4_update_lease(rfs4_client_t * cp)1965 rfs4_update_lease(rfs4_client_t *cp)
1966 {
1967 rfs4_dbe_lock(cp->rc_dbe);
1968 if (!cp->rc_forced_expire)
1969 cp->rc_last_access = gethrestime_sec();
1970 rfs4_dbe_unlock(cp->rc_dbe);
1971 }
1972
1973
1974 static bool_t
EQOPENOWNER(open_owner4 * a,open_owner4 * b)1975 EQOPENOWNER(open_owner4 *a, open_owner4 *b)
1976 {
1977 bool_t rc;
1978
1979 if (a->clientid != b->clientid)
1980 return (FALSE);
1981
1982 if (a->owner_len != b->owner_len)
1983 return (FALSE);
1984
1985 rc = (bcmp(a->owner_val, b->owner_val, a->owner_len) == 0);
1986
1987 return (rc);
1988 }
1989
1990 static uint_t
openowner_hash(void * key)1991 openowner_hash(void *key)
1992 {
1993 int i;
1994 open_owner4 *openowner = key;
1995 uint_t hash = 0;
1996
1997 for (i = 0; i < openowner->owner_len; i++) {
1998 hash <<= 4;
1999 hash += (uint_t)openowner->owner_val[i];
2000 }
2001 hash += (uint_t)openowner->clientid;
2002 hash |= (openowner->clientid >> 32);
2003
2004 return (hash);
2005 }
2006
2007 static bool_t
openowner_compare(rfs4_entry_t u_entry,void * key)2008 openowner_compare(rfs4_entry_t u_entry, void *key)
2009 {
2010 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2011 open_owner4 *arg = key;
2012
2013 return (EQOPENOWNER(&oo->ro_owner, arg));
2014 }
2015
2016 void *
openowner_mkkey(rfs4_entry_t u_entry)2017 openowner_mkkey(rfs4_entry_t u_entry)
2018 {
2019 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2020
2021 return (&oo->ro_owner);
2022 }
2023
2024 /* ARGSUSED */
2025 static bool_t
rfs4_openowner_expiry(rfs4_entry_t u_entry)2026 rfs4_openowner_expiry(rfs4_entry_t u_entry)
2027 {
2028 /* openstateid held us and did all needed delay */
2029 return (TRUE);
2030 }
2031
2032 static void
rfs4_openowner_destroy(rfs4_entry_t u_entry)2033 rfs4_openowner_destroy(rfs4_entry_t u_entry)
2034 {
2035 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2036
2037 /* Remove open owner from client's lists of open owners */
2038 rfs4_dbe_lock(oo->ro_client->rc_dbe);
2039 list_remove(&oo->ro_client->rc_openownerlist, oo);
2040 rfs4_dbe_unlock(oo->ro_client->rc_dbe);
2041
2042 /* One less reference to the client */
2043 rfs4_client_rele(oo->ro_client);
2044 oo->ro_client = NULL;
2045
2046 /* Free the last reply for this lock owner */
2047 rfs4_free_reply(&oo->ro_reply);
2048
2049 if (oo->ro_reply_fh.nfs_fh4_val) {
2050 kmem_free(oo->ro_reply_fh.nfs_fh4_val,
2051 oo->ro_reply_fh.nfs_fh4_len);
2052 oo->ro_reply_fh.nfs_fh4_val = NULL;
2053 oo->ro_reply_fh.nfs_fh4_len = 0;
2054 }
2055
2056 rfs4_sw_destroy(&oo->ro_sw);
2057 list_destroy(&oo->ro_statelist);
2058
2059 /* Free the lock owner id */
2060 kmem_free(oo->ro_owner.owner_val, oo->ro_owner.owner_len);
2061 }
2062
2063 void
rfs4_openowner_rele(rfs4_openowner_t * oo)2064 rfs4_openowner_rele(rfs4_openowner_t *oo)
2065 {
2066 rfs4_dbe_rele(oo->ro_dbe);
2067 }
2068
2069 static bool_t
rfs4_openowner_create(rfs4_entry_t u_entry,void * arg)2070 rfs4_openowner_create(rfs4_entry_t u_entry, void *arg)
2071 {
2072 rfs4_openowner_t *oo = (rfs4_openowner_t *)u_entry;
2073 rfs4_openowner_t *argp = (rfs4_openowner_t *)arg;
2074 open_owner4 *openowner = &argp->ro_owner;
2075 seqid4 seqid = argp->ro_open_seqid;
2076 rfs4_client_t *cp;
2077 bool_t create = FALSE;
2078
2079 rw_enter(&rfs4_findclient_lock, RW_READER);
2080
2081 cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx,
2082 &openowner->clientid,
2083 &create, NULL, RFS4_DBS_VALID);
2084
2085 rw_exit(&rfs4_findclient_lock);
2086
2087 if (cp == NULL)
2088 return (FALSE);
2089
2090 oo->ro_reply_fh.nfs_fh4_len = 0;
2091 oo->ro_reply_fh.nfs_fh4_val = NULL;
2092
2093 oo->ro_owner.clientid = openowner->clientid;
2094 oo->ro_owner.owner_val =
2095 kmem_alloc(openowner->owner_len, KM_SLEEP);
2096
2097 bcopy(openowner->owner_val,
2098 oo->ro_owner.owner_val, openowner->owner_len);
2099
2100 oo->ro_owner.owner_len = openowner->owner_len;
2101
2102 oo->ro_need_confirm = TRUE;
2103
2104 rfs4_sw_init(&oo->ro_sw);
2105
2106 oo->ro_open_seqid = seqid;
2107 bzero(&oo->ro_reply, sizeof (nfs_resop4));
2108 oo->ro_client = cp;
2109 oo->ro_cr_set = NULL;
2110
2111 list_create(&oo->ro_statelist, sizeof (rfs4_state_t),
2112 offsetof(rfs4_state_t, rs_node));
2113
2114 /* Insert openowner into client's open owner list */
2115 rfs4_dbe_lock(cp->rc_dbe);
2116 list_insert_tail(&cp->rc_openownerlist, oo);
2117 rfs4_dbe_unlock(cp->rc_dbe);
2118
2119 return (TRUE);
2120 }
2121
2122 rfs4_openowner_t *
rfs4_findopenowner(open_owner4 * openowner,bool_t * create,seqid4 seqid)2123 rfs4_findopenowner(open_owner4 *openowner, bool_t *create, seqid4 seqid)
2124 {
2125 rfs4_openowner_t *oo;
2126 rfs4_openowner_t arg;
2127
2128 arg.ro_owner = *openowner;
2129 arg.ro_open_seqid = seqid;
2130 oo = (rfs4_openowner_t *)rfs4_dbsearch(rfs4_openowner_idx, openowner,
2131 create, &arg, RFS4_DBS_VALID);
2132
2133 return (oo);
2134 }
2135
2136 void
rfs4_update_open_sequence(rfs4_openowner_t * oo)2137 rfs4_update_open_sequence(rfs4_openowner_t *oo)
2138 {
2139
2140 rfs4_dbe_lock(oo->ro_dbe);
2141
2142 oo->ro_open_seqid++;
2143
2144 rfs4_dbe_unlock(oo->ro_dbe);
2145 }
2146
2147 void
rfs4_update_open_resp(rfs4_openowner_t * oo,nfs_resop4 * resp,nfs_fh4 * fh)2148 rfs4_update_open_resp(rfs4_openowner_t *oo, nfs_resop4 *resp, nfs_fh4 *fh)
2149 {
2150
2151 rfs4_dbe_lock(oo->ro_dbe);
2152
2153 rfs4_free_reply(&oo->ro_reply);
2154
2155 rfs4_copy_reply(&oo->ro_reply, resp);
2156
2157 /* Save the filehandle if provided and free if not used */
2158 if (resp->nfs_resop4_u.opopen.status == NFS4_OK &&
2159 fh && fh->nfs_fh4_len) {
2160 if (oo->ro_reply_fh.nfs_fh4_val == NULL)
2161 oo->ro_reply_fh.nfs_fh4_val =
2162 kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
2163 nfs_fh4_copy(fh, &oo->ro_reply_fh);
2164 } else {
2165 if (oo->ro_reply_fh.nfs_fh4_val) {
2166 kmem_free(oo->ro_reply_fh.nfs_fh4_val,
2167 oo->ro_reply_fh.nfs_fh4_len);
2168 oo->ro_reply_fh.nfs_fh4_val = NULL;
2169 oo->ro_reply_fh.nfs_fh4_len = 0;
2170 }
2171 }
2172
2173 rfs4_dbe_unlock(oo->ro_dbe);
2174 }
2175
2176 static bool_t
lockowner_compare(rfs4_entry_t u_entry,void * key)2177 lockowner_compare(rfs4_entry_t u_entry, void *key)
2178 {
2179 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2180 lock_owner4 *b = (lock_owner4 *)key;
2181
2182 if (lo->rl_owner.clientid != b->clientid)
2183 return (FALSE);
2184
2185 if (lo->rl_owner.owner_len != b->owner_len)
2186 return (FALSE);
2187
2188 return (bcmp(lo->rl_owner.owner_val, b->owner_val,
2189 lo->rl_owner.owner_len) == 0);
2190 }
2191
2192 void *
lockowner_mkkey(rfs4_entry_t u_entry)2193 lockowner_mkkey(rfs4_entry_t u_entry)
2194 {
2195 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2196
2197 return (&lo->rl_owner);
2198 }
2199
2200 static uint32_t
lockowner_hash(void * key)2201 lockowner_hash(void *key)
2202 {
2203 int i;
2204 lock_owner4 *lockowner = key;
2205 uint_t hash = 0;
2206
2207 for (i = 0; i < lockowner->owner_len; i++) {
2208 hash <<= 4;
2209 hash += (uint_t)lockowner->owner_val[i];
2210 }
2211 hash += (uint_t)lockowner->clientid;
2212 hash |= (lockowner->clientid >> 32);
2213
2214 return (hash);
2215 }
2216
2217 static uint32_t
pid_hash(void * key)2218 pid_hash(void *key)
2219 {
2220 return ((uint32_t)(uintptr_t)key);
2221 }
2222
2223 static void *
pid_mkkey(rfs4_entry_t u_entry)2224 pid_mkkey(rfs4_entry_t u_entry)
2225 {
2226 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2227
2228 return ((void *)(uintptr_t)lo->rl_pid);
2229 }
2230
2231 static bool_t
pid_compare(rfs4_entry_t u_entry,void * key)2232 pid_compare(rfs4_entry_t u_entry, void *key)
2233 {
2234 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2235
2236 return (lo->rl_pid == (pid_t)(uintptr_t)key);
2237 }
2238
2239 static void
rfs4_lockowner_destroy(rfs4_entry_t u_entry)2240 rfs4_lockowner_destroy(rfs4_entry_t u_entry)
2241 {
2242 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2243
2244 /* Free the lock owner id */
2245 kmem_free(lo->rl_owner.owner_val, lo->rl_owner.owner_len);
2246 rfs4_client_rele(lo->rl_client);
2247 }
2248
2249 void
rfs4_lockowner_rele(rfs4_lockowner_t * lo)2250 rfs4_lockowner_rele(rfs4_lockowner_t *lo)
2251 {
2252 rfs4_dbe_rele(lo->rl_dbe);
2253 }
2254
2255 /* ARGSUSED */
2256 static bool_t
rfs4_lockowner_expiry(rfs4_entry_t u_entry)2257 rfs4_lockowner_expiry(rfs4_entry_t u_entry)
2258 {
2259 /*
2260 * Since expiry is called with no other references on
2261 * this struct, go ahead and have it removed.
2262 */
2263 return (TRUE);
2264 }
2265
2266 static bool_t
rfs4_lockowner_create(rfs4_entry_t u_entry,void * arg)2267 rfs4_lockowner_create(rfs4_entry_t u_entry, void *arg)
2268 {
2269 rfs4_lockowner_t *lo = (rfs4_lockowner_t *)u_entry;
2270 lock_owner4 *lockowner = (lock_owner4 *)arg;
2271 rfs4_client_t *cp;
2272 bool_t create = FALSE;
2273
2274 rw_enter(&rfs4_findclient_lock, RW_READER);
2275
2276 cp = (rfs4_client_t *)rfs4_dbsearch(rfs4_clientid_idx,
2277 &lockowner->clientid,
2278 &create, NULL, RFS4_DBS_VALID);
2279
2280 rw_exit(&rfs4_findclient_lock);
2281
2282 if (cp == NULL)
2283 return (FALSE);
2284
2285 /* Reference client */
2286 lo->rl_client = cp;
2287 lo->rl_owner.clientid = lockowner->clientid;
2288 lo->rl_owner.owner_val = kmem_alloc(lockowner->owner_len, KM_SLEEP);
2289 bcopy(lockowner->owner_val, lo->rl_owner.owner_val,
2290 lockowner->owner_len);
2291 lo->rl_owner.owner_len = lockowner->owner_len;
2292 lo->rl_pid = rfs4_dbe_getid(lo->rl_dbe);
2293
2294 return (TRUE);
2295 }
2296
2297 rfs4_lockowner_t *
rfs4_findlockowner(lock_owner4 * lockowner,bool_t * create)2298 rfs4_findlockowner(lock_owner4 *lockowner, bool_t *create)
2299 {
2300 rfs4_lockowner_t *lo;
2301
2302 lo = (rfs4_lockowner_t *)rfs4_dbsearch(rfs4_lockowner_idx, lockowner,
2303 create, lockowner, RFS4_DBS_VALID);
2304
2305 return (lo);
2306 }
2307
2308 rfs4_lockowner_t *
rfs4_findlockowner_by_pid(pid_t pid)2309 rfs4_findlockowner_by_pid(pid_t pid)
2310 {
2311 rfs4_lockowner_t *lo;
2312 bool_t create = FALSE;
2313
2314 lo = (rfs4_lockowner_t *)rfs4_dbsearch(rfs4_lockowner_pid_idx,
2315 (void *)(uintptr_t)pid, &create, NULL, RFS4_DBS_VALID);
2316
2317 return (lo);
2318 }
2319
2320
2321 static uint32_t
file_hash(void * key)2322 file_hash(void *key)
2323 {
2324 return (ADDRHASH(key));
2325 }
2326
2327 static void *
file_mkkey(rfs4_entry_t u_entry)2328 file_mkkey(rfs4_entry_t u_entry)
2329 {
2330 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2331
2332 return (fp->rf_vp);
2333 }
2334
2335 static bool_t
file_compare(rfs4_entry_t u_entry,void * key)2336 file_compare(rfs4_entry_t u_entry, void *key)
2337 {
2338 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2339
2340 return (fp->rf_vp == (vnode_t *)key);
2341 }
2342
2343 static void
rfs4_file_destroy(rfs4_entry_t u_entry)2344 rfs4_file_destroy(rfs4_entry_t u_entry)
2345 {
2346 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2347
2348 list_destroy(&fp->rf_delegstatelist);
2349
2350 if (fp->rf_filehandle.nfs_fh4_val)
2351 kmem_free(fp->rf_filehandle.nfs_fh4_val,
2352 fp->rf_filehandle.nfs_fh4_len);
2353 cv_destroy(fp->rf_dinfo.rd_recall_cv);
2354 if (fp->rf_vp) {
2355 vnode_t *vp = fp->rf_vp;
2356
2357 mutex_enter(&vp->v_vsd_lock);
2358 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
2359 mutex_exit(&vp->v_vsd_lock);
2360 VN_RELE(vp);
2361 fp->rf_vp = NULL;
2362 }
2363 rw_destroy(&fp->rf_file_rwlock);
2364 }
2365
2366 /*
2367 * Used to unlock the underlying dbe struct only
2368 */
2369 void
rfs4_file_rele(rfs4_file_t * fp)2370 rfs4_file_rele(rfs4_file_t *fp)
2371 {
2372 rfs4_dbe_rele(fp->rf_dbe);
2373 }
2374
2375 typedef struct {
2376 vnode_t *vp;
2377 nfs_fh4 *fh;
2378 } rfs4_fcreate_arg;
2379
2380 static bool_t
rfs4_file_create(rfs4_entry_t u_entry,void * arg)2381 rfs4_file_create(rfs4_entry_t u_entry, void *arg)
2382 {
2383 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
2384 rfs4_fcreate_arg *ap = (rfs4_fcreate_arg *)arg;
2385 vnode_t *vp = ap->vp;
2386 nfs_fh4 *fh = ap->fh;
2387
2388 VN_HOLD(vp);
2389
2390 fp->rf_filehandle.nfs_fh4_len = 0;
2391 fp->rf_filehandle.nfs_fh4_val = NULL;
2392 ASSERT(fh && fh->nfs_fh4_len);
2393 if (fh && fh->nfs_fh4_len) {
2394 fp->rf_filehandle.nfs_fh4_val =
2395 kmem_alloc(fh->nfs_fh4_len, KM_SLEEP);
2396 nfs_fh4_copy(fh, &fp->rf_filehandle);
2397 }
2398 fp->rf_vp = vp;
2399
2400 list_create(&fp->rf_delegstatelist, sizeof (rfs4_deleg_state_t),
2401 offsetof(rfs4_deleg_state_t, rds_node));
2402
2403 fp->rf_share_deny = fp->rf_share_access = fp->rf_access_read = 0;
2404 fp->rf_access_write = fp->rf_deny_read = fp->rf_deny_write = 0;
2405
2406 mutex_init(fp->rf_dinfo.rd_recall_lock, NULL, MUTEX_DEFAULT, NULL);
2407 cv_init(fp->rf_dinfo.rd_recall_cv, NULL, CV_DEFAULT, NULL);
2408
2409 fp->rf_dinfo.rd_dtype = OPEN_DELEGATE_NONE;
2410
2411 rw_init(&fp->rf_file_rwlock, NULL, RW_DEFAULT, NULL);
2412
2413 mutex_enter(&vp->v_vsd_lock);
2414 VERIFY(vsd_set(vp, nfs4_srv_vkey, (void *)fp) == 0);
2415 mutex_exit(&vp->v_vsd_lock);
2416
2417 return (TRUE);
2418 }
2419
2420 rfs4_file_t *
rfs4_findfile(vnode_t * vp,nfs_fh4 * fh,bool_t * create)2421 rfs4_findfile(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2422 {
2423 rfs4_file_t *fp;
2424 rfs4_fcreate_arg arg;
2425
2426 arg.vp = vp;
2427 arg.fh = fh;
2428
2429 if (*create == TRUE)
2430 fp = (rfs4_file_t *)rfs4_dbsearch(rfs4_file_idx, vp, create,
2431 &arg, RFS4_DBS_VALID);
2432 else {
2433 mutex_enter(&vp->v_vsd_lock);
2434 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2435 if (fp) {
2436 rfs4_dbe_lock(fp->rf_dbe);
2437 if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2438 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2439 rfs4_dbe_unlock(fp->rf_dbe);
2440 fp = NULL;
2441 } else {
2442 rfs4_dbe_hold(fp->rf_dbe);
2443 rfs4_dbe_unlock(fp->rf_dbe);
2444 }
2445 }
2446 mutex_exit(&vp->v_vsd_lock);
2447 }
2448 return (fp);
2449 }
2450
2451 /*
2452 * Find a file in the db and once it is located, take the rw lock.
2453 * Need to check the vnode pointer and if it does not exist (it was
2454 * removed between the db location and check) redo the find. This
2455 * assumes that a file struct that has a NULL vnode pointer is marked
2456 * at 'invalid' and will not be found in the db the second time
2457 * around.
2458 */
2459 rfs4_file_t *
rfs4_findfile_withlock(vnode_t * vp,nfs_fh4 * fh,bool_t * create)2460 rfs4_findfile_withlock(vnode_t *vp, nfs_fh4 *fh, bool_t *create)
2461 {
2462 rfs4_file_t *fp;
2463 rfs4_fcreate_arg arg;
2464 bool_t screate = *create;
2465
2466 if (screate == FALSE) {
2467 mutex_enter(&vp->v_vsd_lock);
2468 fp = (rfs4_file_t *)vsd_get(vp, nfs4_srv_vkey);
2469 if (fp) {
2470 rfs4_dbe_lock(fp->rf_dbe);
2471 if (rfs4_dbe_is_invalid(fp->rf_dbe) ||
2472 (rfs4_dbe_refcnt(fp->rf_dbe) == 0)) {
2473 rfs4_dbe_unlock(fp->rf_dbe);
2474 mutex_exit(&vp->v_vsd_lock);
2475 fp = NULL;
2476 } else {
2477 rfs4_dbe_hold(fp->rf_dbe);
2478 rfs4_dbe_unlock(fp->rf_dbe);
2479 mutex_exit(&vp->v_vsd_lock);
2480 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2481 if (fp->rf_vp == NULL) {
2482 rw_exit(&fp->rf_file_rwlock);
2483 rfs4_file_rele(fp);
2484 fp = NULL;
2485 }
2486 }
2487 } else {
2488 mutex_exit(&vp->v_vsd_lock);
2489 }
2490 } else {
2491 retry:
2492 arg.vp = vp;
2493 arg.fh = fh;
2494
2495 fp = (rfs4_file_t *)rfs4_dbsearch(rfs4_file_idx, vp, create,
2496 &arg, RFS4_DBS_VALID);
2497 if (fp != NULL) {
2498 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
2499 if (fp->rf_vp == NULL) {
2500 rw_exit(&fp->rf_file_rwlock);
2501 rfs4_file_rele(fp);
2502 *create = screate;
2503 goto retry;
2504 }
2505 }
2506 }
2507
2508 return (fp);
2509 }
2510
2511 static uint32_t
lo_state_hash(void * key)2512 lo_state_hash(void *key)
2513 {
2514 stateid_t *id = key;
2515
2516 return (id->bits.ident+id->bits.pid);
2517 }
2518
2519 static bool_t
lo_state_compare(rfs4_entry_t u_entry,void * key)2520 lo_state_compare(rfs4_entry_t u_entry, void *key)
2521 {
2522 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2523 stateid_t *id = key;
2524 bool_t rc;
2525
2526 rc = (lsp->rls_lockid.bits.boottime == id->bits.boottime &&
2527 lsp->rls_lockid.bits.type == id->bits.type &&
2528 lsp->rls_lockid.bits.ident == id->bits.ident &&
2529 lsp->rls_lockid.bits.pid == id->bits.pid);
2530
2531 return (rc);
2532 }
2533
2534 static void *
lo_state_mkkey(rfs4_entry_t u_entry)2535 lo_state_mkkey(rfs4_entry_t u_entry)
2536 {
2537 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2538
2539 return (&lsp->rls_lockid);
2540 }
2541
2542 static bool_t
rfs4_lo_state_expiry(rfs4_entry_t u_entry)2543 rfs4_lo_state_expiry(rfs4_entry_t u_entry)
2544 {
2545 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2546
2547 if (rfs4_dbe_is_invalid(lsp->rls_dbe))
2548 return (TRUE);
2549 if (lsp->rls_state->rs_closed)
2550 return (TRUE);
2551 return ((gethrestime_sec() -
2552 lsp->rls_state->rs_owner->ro_client->rc_last_access
2553 > rfs4_lease_time));
2554 }
2555
2556 static void
rfs4_lo_state_destroy(rfs4_entry_t u_entry)2557 rfs4_lo_state_destroy(rfs4_entry_t u_entry)
2558 {
2559 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2560
2561 rfs4_dbe_lock(lsp->rls_state->rs_dbe);
2562 list_remove(&lsp->rls_state->rs_lostatelist, lsp);
2563 rfs4_dbe_unlock(lsp->rls_state->rs_dbe);
2564
2565 rfs4_sw_destroy(&lsp->rls_sw);
2566
2567 /* Make sure to release the file locks */
2568 if (lsp->rls_locks_cleaned == FALSE) {
2569 lsp->rls_locks_cleaned = TRUE;
2570 if (lsp->rls_locker->rl_client->rc_sysidt != LM_NOSYSID) {
2571 /* Is the PxFS kernel module loaded? */
2572 if (lm_remove_file_locks != NULL) {
2573 int new_sysid;
2574
2575 /* Encode the cluster nodeid in new sysid */
2576 new_sysid =
2577 lsp->rls_locker->rl_client->rc_sysidt;
2578 lm_set_nlmid_flk(&new_sysid);
2579
2580 /*
2581 * This PxFS routine removes file locks for a
2582 * client over all nodes of a cluster.
2583 */
2584 DTRACE_PROBE1(nfss_i_clust_rm_lck,
2585 int, new_sysid);
2586 (*lm_remove_file_locks)(new_sysid);
2587 } else {
2588 (void) cleanlocks(
2589 lsp->rls_state->rs_finfo->rf_vp,
2590 lsp->rls_locker->rl_pid,
2591 lsp->rls_locker->rl_client->rc_sysidt);
2592 }
2593 }
2594 }
2595
2596 /* Free the last reply for this state */
2597 rfs4_free_reply(&lsp->rls_reply);
2598
2599 rfs4_lockowner_rele(lsp->rls_locker);
2600 lsp->rls_locker = NULL;
2601
2602 rfs4_state_rele_nounlock(lsp->rls_state);
2603 lsp->rls_state = NULL;
2604 }
2605
2606 static bool_t
rfs4_lo_state_create(rfs4_entry_t u_entry,void * arg)2607 rfs4_lo_state_create(rfs4_entry_t u_entry, void *arg)
2608 {
2609 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2610 rfs4_lo_state_t *argp = (rfs4_lo_state_t *)arg;
2611 rfs4_lockowner_t *lo = argp->rls_locker;
2612 rfs4_state_t *sp = argp->rls_state;
2613
2614 lsp->rls_state = sp;
2615
2616 lsp->rls_lockid = sp->rs_stateid;
2617 lsp->rls_lockid.bits.type = LOCKID;
2618 lsp->rls_lockid.bits.chgseq = 0;
2619 lsp->rls_lockid.bits.pid = lo->rl_pid;
2620
2621 lsp->rls_locks_cleaned = FALSE;
2622 lsp->rls_lock_completed = FALSE;
2623
2624 rfs4_sw_init(&lsp->rls_sw);
2625
2626 /* Attached the supplied lock owner */
2627 rfs4_dbe_hold(lo->rl_dbe);
2628 lsp->rls_locker = lo;
2629
2630 rfs4_dbe_lock(sp->rs_dbe);
2631 list_insert_tail(&sp->rs_lostatelist, lsp);
2632 rfs4_dbe_hold(sp->rs_dbe);
2633 rfs4_dbe_unlock(sp->rs_dbe);
2634
2635 return (TRUE);
2636 }
2637
2638 void
rfs4_lo_state_rele(rfs4_lo_state_t * lsp,bool_t unlock_fp)2639 rfs4_lo_state_rele(rfs4_lo_state_t *lsp, bool_t unlock_fp)
2640 {
2641 if (unlock_fp == TRUE)
2642 rw_exit(&lsp->rls_state->rs_finfo->rf_file_rwlock);
2643 rfs4_dbe_rele(lsp->rls_dbe);
2644 }
2645
2646 static rfs4_lo_state_t *
rfs4_findlo_state(stateid_t * id,bool_t lock_fp)2647 rfs4_findlo_state(stateid_t *id, bool_t lock_fp)
2648 {
2649 rfs4_lo_state_t *lsp;
2650 bool_t create = FALSE;
2651
2652 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(rfs4_lo_state_idx, id,
2653 &create, NULL, RFS4_DBS_VALID);
2654 if (lock_fp == TRUE && lsp != NULL)
2655 rw_enter(&lsp->rls_state->rs_finfo->rf_file_rwlock, RW_READER);
2656
2657 return (lsp);
2658 }
2659
2660
2661 static uint32_t
lo_state_lo_hash(void * key)2662 lo_state_lo_hash(void *key)
2663 {
2664 rfs4_lo_state_t *lsp = key;
2665
2666 return (ADDRHASH(lsp->rls_locker) ^ ADDRHASH(lsp->rls_state));
2667 }
2668
2669 static bool_t
lo_state_lo_compare(rfs4_entry_t u_entry,void * key)2670 lo_state_lo_compare(rfs4_entry_t u_entry, void *key)
2671 {
2672 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
2673 rfs4_lo_state_t *keyp = key;
2674
2675 return (keyp->rls_locker == lsp->rls_locker &&
2676 keyp->rls_state == lsp->rls_state);
2677 }
2678
2679 static void *
lo_state_lo_mkkey(rfs4_entry_t u_entry)2680 lo_state_lo_mkkey(rfs4_entry_t u_entry)
2681 {
2682 return (u_entry);
2683 }
2684
2685 rfs4_lo_state_t *
rfs4_findlo_state_by_owner(rfs4_lockowner_t * lo,rfs4_state_t * sp,bool_t * create)2686 rfs4_findlo_state_by_owner(rfs4_lockowner_t *lo, rfs4_state_t *sp,
2687 bool_t *create)
2688 {
2689 rfs4_lo_state_t *lsp;
2690 rfs4_lo_state_t arg;
2691
2692 arg.rls_locker = lo;
2693 arg.rls_state = sp;
2694
2695 lsp = (rfs4_lo_state_t *)rfs4_dbsearch(rfs4_lo_state_owner_idx, &arg,
2696 create, &arg, RFS4_DBS_VALID);
2697
2698 return (lsp);
2699 }
2700
2701 static stateid_t
get_stateid(id_t eid)2702 get_stateid(id_t eid)
2703 {
2704 stateid_t id;
2705
2706 id.bits.boottime = rfs4_start_time;
2707 id.bits.ident = eid;
2708 id.bits.chgseq = 0;
2709 id.bits.type = 0;
2710 id.bits.pid = 0;
2711
2712 /*
2713 * If we are booted as a cluster node, embed our nodeid.
2714 * We've already done sanity checks in rfs4_client_create() so no
2715 * need to repeat them here.
2716 */
2717 id.bits.clnodeid = (cluster_bootflags & CLUSTER_BOOTED) ?
2718 clconf_get_nodeid() : 0;
2719
2720 return (id);
2721 }
2722
2723 /*
2724 * For use only when booted as a cluster node.
2725 * Returns TRUE if the embedded nodeid indicates that this stateid was
2726 * generated on another node.
2727 */
2728 static int
foreign_stateid(stateid_t * id)2729 foreign_stateid(stateid_t *id)
2730 {
2731 ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2732 return (id->bits.clnodeid != (uint32_t)clconf_get_nodeid());
2733 }
2734
2735 /*
2736 * For use only when booted as a cluster node.
2737 * Returns TRUE if the embedded nodeid indicates that this clientid was
2738 * generated on another node.
2739 */
2740 static int
foreign_clientid(cid * cidp)2741 foreign_clientid(cid *cidp)
2742 {
2743 ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2744 return (cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT !=
2745 (uint32_t)clconf_get_nodeid());
2746 }
2747
2748 /*
2749 * For use only when booted as a cluster node.
2750 * Embed our cluster nodeid into the clientid.
2751 */
2752 static void
embed_nodeid(cid * cidp)2753 embed_nodeid(cid *cidp)
2754 {
2755 int clnodeid;
2756 /*
2757 * Currently, our state tables are small enough that their
2758 * ids will leave enough bits free for the nodeid. If the
2759 * tables become larger, we mustn't overwrite the id.
2760 * Equally, we only have room for so many bits of nodeid, so
2761 * must check that too.
2762 */
2763 ASSERT(cluster_bootflags & CLUSTER_BOOTED);
2764 ASSERT(cidp->impl_id.c_id >> CLUSTER_NODEID_SHIFT == 0);
2765 clnodeid = clconf_get_nodeid();
2766 ASSERT(clnodeid <= CLUSTER_MAX_NODEID);
2767 ASSERT(clnodeid != NODEID_UNKNOWN);
2768 cidp->impl_id.c_id |= (clnodeid << CLUSTER_NODEID_SHIFT);
2769 }
2770
2771 static uint32_t
state_hash(void * key)2772 state_hash(void *key)
2773 {
2774 stateid_t *ip = (stateid_t *)key;
2775
2776 return (ip->bits.ident);
2777 }
2778
2779 static bool_t
state_compare(rfs4_entry_t u_entry,void * key)2780 state_compare(rfs4_entry_t u_entry, void *key)
2781 {
2782 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2783 stateid_t *id = (stateid_t *)key;
2784 bool_t rc;
2785
2786 rc = (sp->rs_stateid.bits.boottime == id->bits.boottime &&
2787 sp->rs_stateid.bits.ident == id->bits.ident);
2788
2789 return (rc);
2790 }
2791
2792 static void *
state_mkkey(rfs4_entry_t u_entry)2793 state_mkkey(rfs4_entry_t u_entry)
2794 {
2795 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2796
2797 return (&sp->rs_stateid);
2798 }
2799
2800 static void
rfs4_state_destroy(rfs4_entry_t u_entry)2801 rfs4_state_destroy(rfs4_entry_t u_entry)
2802 {
2803 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
2804
2805 /* remove from openowner list */
2806 rfs4_dbe_lock(sp->rs_owner->ro_dbe);
2807 list_remove(&sp->rs_owner->ro_statelist, sp);
2808 rfs4_dbe_unlock(sp->rs_owner->ro_dbe);
2809
2810 list_destroy(&sp->rs_lostatelist);
2811
2812 /* release any share locks for this stateid if it's still open */
2813 if (!sp->rs_closed) {
2814 rfs4_dbe_lock(sp->rs_dbe);
2815 (void) rfs4_unshare(sp);
2816 rfs4_dbe_unlock(sp->rs_dbe);
2817 }
2818
2819 /* Were done with the file */
2820 rfs4_file_rele(sp->rs_finfo);
2821 sp->rs_finfo = NULL;
2822
2823 /* And now with the openowner */
2824 rfs4_openowner_rele(sp->rs_owner);
2825 sp->rs_owner = NULL;
2826 }
2827
2828 static void
rfs4_state_rele_nounlock(rfs4_state_t * sp)2829 rfs4_state_rele_nounlock(rfs4_state_t *sp)
2830 {
2831 rfs4_dbe_rele(sp->rs_dbe);
2832 }
2833
2834 void
rfs4_state_rele(rfs4_state_t * sp)2835 rfs4_state_rele(rfs4_state_t *sp)
2836 {
2837 rw_exit(&sp->rs_finfo->rf_file_rwlock);
2838 rfs4_dbe_rele(sp->rs_dbe);
2839 }
2840
2841 static uint32_t
deleg_hash(void * key)2842 deleg_hash(void *key)
2843 {
2844 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)key;
2845
2846 return (ADDRHASH(dsp->rds_client) ^ ADDRHASH(dsp->rds_finfo));
2847 }
2848
2849 static bool_t
deleg_compare(rfs4_entry_t u_entry,void * key)2850 deleg_compare(rfs4_entry_t u_entry, void *key)
2851 {
2852 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2853 rfs4_deleg_state_t *kdsp = (rfs4_deleg_state_t *)key;
2854
2855 return (dsp->rds_client == kdsp->rds_client &&
2856 dsp->rds_finfo == kdsp->rds_finfo);
2857 }
2858
2859 static void *
deleg_mkkey(rfs4_entry_t u_entry)2860 deleg_mkkey(rfs4_entry_t u_entry)
2861 {
2862 return (u_entry);
2863 }
2864
2865 static uint32_t
deleg_state_hash(void * key)2866 deleg_state_hash(void *key)
2867 {
2868 stateid_t *ip = (stateid_t *)key;
2869
2870 return (ip->bits.ident);
2871 }
2872
2873 static bool_t
deleg_state_compare(rfs4_entry_t u_entry,void * key)2874 deleg_state_compare(rfs4_entry_t u_entry, void *key)
2875 {
2876 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2877 stateid_t *id = (stateid_t *)key;
2878 bool_t rc;
2879
2880 if (id->bits.type != DELEGID)
2881 return (FALSE);
2882
2883 rc = (dsp->rds_delegid.bits.boottime == id->bits.boottime &&
2884 dsp->rds_delegid.bits.ident == id->bits.ident);
2885
2886 return (rc);
2887 }
2888
2889 static void *
deleg_state_mkkey(rfs4_entry_t u_entry)2890 deleg_state_mkkey(rfs4_entry_t u_entry)
2891 {
2892 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2893
2894 return (&dsp->rds_delegid);
2895 }
2896
2897 static bool_t
rfs4_deleg_state_expiry(rfs4_entry_t u_entry)2898 rfs4_deleg_state_expiry(rfs4_entry_t u_entry)
2899 {
2900 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2901
2902 if (rfs4_dbe_is_invalid(dsp->rds_dbe))
2903 return (TRUE);
2904
2905 if (dsp->rds_dtype == OPEN_DELEGATE_NONE)
2906 return (TRUE);
2907
2908 if ((gethrestime_sec() - dsp->rds_client->rc_last_access
2909 > rfs4_lease_time)) {
2910 rfs4_dbe_invalidate(dsp->rds_dbe);
2911 return (TRUE);
2912 }
2913
2914 return (FALSE);
2915 }
2916
2917 static bool_t
rfs4_deleg_state_create(rfs4_entry_t u_entry,void * argp)2918 rfs4_deleg_state_create(rfs4_entry_t u_entry, void *argp)
2919 {
2920 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2921 rfs4_file_t *fp = ((rfs4_deleg_state_t *)argp)->rds_finfo;
2922 rfs4_client_t *cp = ((rfs4_deleg_state_t *)argp)->rds_client;
2923
2924 rfs4_dbe_hold(fp->rf_dbe);
2925 rfs4_dbe_hold(cp->rc_dbe);
2926
2927 dsp->rds_delegid = get_stateid(rfs4_dbe_getid(dsp->rds_dbe));
2928 dsp->rds_delegid.bits.type = DELEGID;
2929 dsp->rds_finfo = fp;
2930 dsp->rds_client = cp;
2931 dsp->rds_dtype = OPEN_DELEGATE_NONE;
2932
2933 dsp->rds_time_granted = gethrestime_sec(); /* observability */
2934 dsp->rds_time_revoked = 0;
2935
2936 list_link_init(&dsp->rds_node);
2937
2938 return (TRUE);
2939 }
2940
2941 static void
rfs4_deleg_state_destroy(rfs4_entry_t u_entry)2942 rfs4_deleg_state_destroy(rfs4_entry_t u_entry)
2943 {
2944 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
2945
2946 /* return delegation if necessary */
2947 rfs4_return_deleg(dsp, FALSE);
2948
2949 /* Were done with the file */
2950 rfs4_file_rele(dsp->rds_finfo);
2951 dsp->rds_finfo = NULL;
2952
2953 /* And now with the openowner */
2954 rfs4_client_rele(dsp->rds_client);
2955 dsp->rds_client = NULL;
2956 }
2957
2958 rfs4_deleg_state_t *
rfs4_finddeleg(rfs4_state_t * sp,bool_t * create)2959 rfs4_finddeleg(rfs4_state_t *sp, bool_t *create)
2960 {
2961 rfs4_deleg_state_t ds, *dsp;
2962
2963 ds.rds_client = sp->rs_owner->ro_client;
2964 ds.rds_finfo = sp->rs_finfo;
2965
2966 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(rfs4_deleg_idx, &ds,
2967 create, &ds, RFS4_DBS_VALID);
2968
2969 return (dsp);
2970 }
2971
2972 rfs4_deleg_state_t *
rfs4_finddelegstate(stateid_t * id)2973 rfs4_finddelegstate(stateid_t *id)
2974 {
2975 rfs4_deleg_state_t *dsp;
2976 bool_t create = FALSE;
2977
2978 dsp = (rfs4_deleg_state_t *)rfs4_dbsearch(rfs4_deleg_state_idx, id,
2979 &create, NULL, RFS4_DBS_VALID);
2980
2981 return (dsp);
2982 }
2983
2984 void
rfs4_deleg_state_rele(rfs4_deleg_state_t * dsp)2985 rfs4_deleg_state_rele(rfs4_deleg_state_t *dsp)
2986 {
2987 rfs4_dbe_rele(dsp->rds_dbe);
2988 }
2989
2990 void
rfs4_update_lock_sequence(rfs4_lo_state_t * lsp)2991 rfs4_update_lock_sequence(rfs4_lo_state_t *lsp)
2992 {
2993
2994 rfs4_dbe_lock(lsp->rls_dbe);
2995
2996 /*
2997 * If we are skipping sequence id checking, this means that
2998 * this is the first lock request and therefore the sequence
2999 * id does not need to be updated. This only happens on the
3000 * first lock request for a lockowner
3001 */
3002 if (!lsp->rls_skip_seqid_check)
3003 lsp->rls_seqid++;
3004
3005 rfs4_dbe_unlock(lsp->rls_dbe);
3006 }
3007
3008 void
rfs4_update_lock_resp(rfs4_lo_state_t * lsp,nfs_resop4 * resp)3009 rfs4_update_lock_resp(rfs4_lo_state_t *lsp, nfs_resop4 *resp)
3010 {
3011
3012 rfs4_dbe_lock(lsp->rls_dbe);
3013
3014 rfs4_free_reply(&lsp->rls_reply);
3015
3016 rfs4_copy_reply(&lsp->rls_reply, resp);
3017
3018 rfs4_dbe_unlock(lsp->rls_dbe);
3019 }
3020
3021 void
rfs4_free_opens(rfs4_openowner_t * oo,bool_t invalidate,bool_t close_of_client)3022 rfs4_free_opens(rfs4_openowner_t *oo, bool_t invalidate,
3023 bool_t close_of_client)
3024 {
3025 rfs4_state_t *sp;
3026
3027 rfs4_dbe_lock(oo->ro_dbe);
3028
3029 for (sp = list_head(&oo->ro_statelist); sp != NULL;
3030 sp = list_next(&oo->ro_statelist, sp)) {
3031 rfs4_state_close(sp, FALSE, close_of_client, CRED());
3032 if (invalidate == TRUE)
3033 rfs4_dbe_invalidate(sp->rs_dbe);
3034 }
3035
3036 rfs4_dbe_invalidate(oo->ro_dbe);
3037 rfs4_dbe_unlock(oo->ro_dbe);
3038 }
3039
3040 static uint32_t
state_owner_file_hash(void * key)3041 state_owner_file_hash(void *key)
3042 {
3043 rfs4_state_t *sp = key;
3044
3045 return (ADDRHASH(sp->rs_owner) ^ ADDRHASH(sp->rs_finfo));
3046 }
3047
3048 static bool_t
state_owner_file_compare(rfs4_entry_t u_entry,void * key)3049 state_owner_file_compare(rfs4_entry_t u_entry, void *key)
3050 {
3051 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3052 rfs4_state_t *arg = key;
3053
3054 if (sp->rs_closed == TRUE)
3055 return (FALSE);
3056
3057 return (arg->rs_owner == sp->rs_owner && arg->rs_finfo == sp->rs_finfo);
3058 }
3059
3060 static void *
state_owner_file_mkkey(rfs4_entry_t u_entry)3061 state_owner_file_mkkey(rfs4_entry_t u_entry)
3062 {
3063 return (u_entry);
3064 }
3065
3066 static uint32_t
state_file_hash(void * key)3067 state_file_hash(void *key)
3068 {
3069 return (ADDRHASH(key));
3070 }
3071
3072 static bool_t
state_file_compare(rfs4_entry_t u_entry,void * key)3073 state_file_compare(rfs4_entry_t u_entry, void *key)
3074 {
3075 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3076 rfs4_file_t *fp = key;
3077
3078 if (sp->rs_closed == TRUE)
3079 return (FALSE);
3080
3081 return (fp == sp->rs_finfo);
3082 }
3083
3084 static void *
state_file_mkkey(rfs4_entry_t u_entry)3085 state_file_mkkey(rfs4_entry_t u_entry)
3086 {
3087 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3088
3089 return (sp->rs_finfo);
3090 }
3091
3092 rfs4_state_t *
rfs4_findstate_by_owner_file(rfs4_openowner_t * oo,rfs4_file_t * fp,bool_t * create)3093 rfs4_findstate_by_owner_file(rfs4_openowner_t *oo, rfs4_file_t *fp,
3094 bool_t *create)
3095 {
3096 rfs4_state_t *sp;
3097 rfs4_state_t key;
3098
3099 key.rs_owner = oo;
3100 key.rs_finfo = fp;
3101
3102 sp = (rfs4_state_t *)rfs4_dbsearch(rfs4_state_owner_file_idx, &key,
3103 create, &key, RFS4_DBS_VALID);
3104
3105 return (sp);
3106 }
3107
3108 /* This returns ANY state struct that refers to this file */
3109 static rfs4_state_t *
rfs4_findstate_by_file(rfs4_file_t * fp)3110 rfs4_findstate_by_file(rfs4_file_t *fp)
3111 {
3112 bool_t create = FALSE;
3113
3114 return ((rfs4_state_t *)rfs4_dbsearch(rfs4_state_file_idx, fp,
3115 &create, fp, RFS4_DBS_VALID));
3116 }
3117
3118 static bool_t
rfs4_state_expiry(rfs4_entry_t u_entry)3119 rfs4_state_expiry(rfs4_entry_t u_entry)
3120 {
3121 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3122
3123 if (rfs4_dbe_is_invalid(sp->rs_dbe))
3124 return (TRUE);
3125
3126 if (sp->rs_closed == TRUE &&
3127 ((gethrestime_sec() - rfs4_dbe_get_timerele(sp->rs_dbe))
3128 > rfs4_lease_time))
3129 return (TRUE);
3130
3131 return ((gethrestime_sec() - sp->rs_owner->ro_client->rc_last_access
3132 > rfs4_lease_time));
3133 }
3134
3135 static bool_t
rfs4_state_create(rfs4_entry_t u_entry,void * argp)3136 rfs4_state_create(rfs4_entry_t u_entry, void *argp)
3137 {
3138 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3139 rfs4_file_t *fp = ((rfs4_state_t *)argp)->rs_finfo;
3140 rfs4_openowner_t *oo = ((rfs4_state_t *)argp)->rs_owner;
3141
3142 rfs4_dbe_hold(fp->rf_dbe);
3143 rfs4_dbe_hold(oo->ro_dbe);
3144 sp->rs_stateid = get_stateid(rfs4_dbe_getid(sp->rs_dbe));
3145 sp->rs_stateid.bits.type = OPENID;
3146 sp->rs_owner = oo;
3147 sp->rs_finfo = fp;
3148
3149 list_create(&sp->rs_lostatelist, sizeof (rfs4_lo_state_t),
3150 offsetof(rfs4_lo_state_t, rls_node));
3151
3152 /* Insert state on per open owner's list */
3153 rfs4_dbe_lock(oo->ro_dbe);
3154 list_insert_tail(&oo->ro_statelist, sp);
3155 rfs4_dbe_unlock(oo->ro_dbe);
3156
3157 return (TRUE);
3158 }
3159
3160 static rfs4_state_t *
rfs4_findstate(stateid_t * id,rfs4_dbsearch_type_t find_invalid,bool_t lock_fp)3161 rfs4_findstate(stateid_t *id, rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3162 {
3163 rfs4_state_t *sp;
3164 bool_t create = FALSE;
3165
3166 sp = (rfs4_state_t *)rfs4_dbsearch(rfs4_state_idx, id,
3167 &create, NULL, find_invalid);
3168 if (lock_fp == TRUE && sp != NULL)
3169 rw_enter(&sp->rs_finfo->rf_file_rwlock, RW_READER);
3170
3171 return (sp);
3172 }
3173
3174 void
rfs4_state_close(rfs4_state_t * sp,bool_t lock_held,bool_t close_of_client,cred_t * cr)3175 rfs4_state_close(rfs4_state_t *sp, bool_t lock_held, bool_t close_of_client,
3176 cred_t *cr)
3177 {
3178 /* Remove the associated lo_state owners */
3179 if (!lock_held)
3180 rfs4_dbe_lock(sp->rs_dbe);
3181
3182 /*
3183 * If refcnt == 0, the dbe is about to be destroyed.
3184 * lock state will be released by the reaper thread.
3185 */
3186
3187 if (rfs4_dbe_refcnt(sp->rs_dbe) > 0) {
3188 if (sp->rs_closed == FALSE) {
3189 rfs4_release_share_lock_state(sp, cr, close_of_client);
3190 sp->rs_closed = TRUE;
3191 }
3192 }
3193
3194 if (!lock_held)
3195 rfs4_dbe_unlock(sp->rs_dbe);
3196 }
3197
3198 /*
3199 * Remove all state associated with the given client.
3200 */
3201 void
rfs4_client_state_remove(rfs4_client_t * cp)3202 rfs4_client_state_remove(rfs4_client_t *cp)
3203 {
3204 rfs4_openowner_t *oo;
3205
3206 rfs4_dbe_lock(cp->rc_dbe);
3207
3208 for (oo = list_head(&cp->rc_openownerlist); oo != NULL;
3209 oo = list_next(&cp->rc_openownerlist, oo)) {
3210 rfs4_free_opens(oo, TRUE, TRUE);
3211 }
3212
3213 rfs4_dbe_unlock(cp->rc_dbe);
3214 }
3215
3216 void
rfs4_client_close(rfs4_client_t * cp)3217 rfs4_client_close(rfs4_client_t *cp)
3218 {
3219 /* Mark client as going away. */
3220 rfs4_dbe_lock(cp->rc_dbe);
3221 rfs4_dbe_invalidate(cp->rc_dbe);
3222 rfs4_dbe_unlock(cp->rc_dbe);
3223
3224 rfs4_client_state_remove(cp);
3225
3226 /* Release the client */
3227 rfs4_client_rele(cp);
3228 }
3229
3230 nfsstat4
rfs4_check_clientid(clientid4 * cp,int setclid_confirm)3231 rfs4_check_clientid(clientid4 *cp, int setclid_confirm)
3232 {
3233 cid *cidp = (cid *) cp;
3234
3235 /*
3236 * If we are booted as a cluster node, check the embedded nodeid.
3237 * If it indicates that this clientid was generated on another node,
3238 * inform the client accordingly.
3239 */
3240 if (cluster_bootflags & CLUSTER_BOOTED && foreign_clientid(cidp))
3241 return (NFS4ERR_STALE_CLIENTID);
3242
3243 /*
3244 * If the server start time matches the time provided
3245 * by the client (via the clientid) and this is NOT a
3246 * setclientid_confirm then return EXPIRED.
3247 */
3248 if (!setclid_confirm && cidp->impl_id.start_time == rfs4_start_time)
3249 return (NFS4ERR_EXPIRED);
3250
3251 return (NFS4ERR_STALE_CLIENTID);
3252 }
3253
3254 /*
3255 * This is used when a stateid has not been found amongst the
3256 * current server's state. Check the stateid to see if it
3257 * was from this server instantiation or not.
3258 */
3259 static nfsstat4
what_stateid_error(stateid_t * id,stateid_type_t type)3260 what_stateid_error(stateid_t *id, stateid_type_t type)
3261 {
3262 /* If we are booted as a cluster node, was stateid locally generated? */
3263 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3264 return (NFS4ERR_STALE_STATEID);
3265
3266 /* If types don't match then no use checking further */
3267 if (type != id->bits.type)
3268 return (NFS4ERR_BAD_STATEID);
3269
3270 /* From a different server instantiation, return STALE */
3271 if (id->bits.boottime != rfs4_start_time)
3272 return (NFS4ERR_STALE_STATEID);
3273
3274 /*
3275 * From this server but the state is most likely beyond lease
3276 * timeout: return NFS4ERR_EXPIRED. However, there is the
3277 * case of a delegation stateid. For delegations, there is a
3278 * case where the state can be removed without the client's
3279 * knowledge/consent: revocation. In the case of delegation
3280 * revocation, the delegation state will be removed and will
3281 * not be found. If the client does something like a
3282 * DELEGRETURN or even a READ/WRITE with a delegatoin stateid
3283 * that has been revoked, the server should return BAD_STATEID
3284 * instead of the more common EXPIRED error.
3285 */
3286 if (id->bits.boottime == rfs4_start_time) {
3287 if (type == DELEGID)
3288 return (NFS4ERR_BAD_STATEID);
3289 else
3290 return (NFS4ERR_EXPIRED);
3291 }
3292
3293 return (NFS4ERR_BAD_STATEID);
3294 }
3295
3296 /*
3297 * Used later on to find the various state structs. When called from
3298 * rfs4_check_stateid()->rfs4_get_all_state(), no file struct lock is
3299 * taken (it is not needed) and helps on the read/write path with
3300 * respect to performance.
3301 */
3302 static nfsstat4
rfs4_get_state_lockit(stateid4 * stateid,rfs4_state_t ** spp,rfs4_dbsearch_type_t find_invalid,bool_t lock_fp)3303 rfs4_get_state_lockit(stateid4 *stateid, rfs4_state_t **spp,
3304 rfs4_dbsearch_type_t find_invalid, bool_t lock_fp)
3305 {
3306 stateid_t *id = (stateid_t *)stateid;
3307 rfs4_state_t *sp;
3308
3309 *spp = NULL;
3310
3311 /* If we are booted as a cluster node, was stateid locally generated? */
3312 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3313 return (NFS4ERR_STALE_STATEID);
3314
3315 sp = rfs4_findstate(id, find_invalid, lock_fp);
3316 if (sp == NULL) {
3317 return (what_stateid_error(id, OPENID));
3318 }
3319
3320 if (rfs4_lease_expired(sp->rs_owner->ro_client)) {
3321 if (lock_fp == TRUE)
3322 rfs4_state_rele(sp);
3323 else
3324 rfs4_state_rele_nounlock(sp);
3325 return (NFS4ERR_EXPIRED);
3326 }
3327
3328 *spp = sp;
3329
3330 return (NFS4_OK);
3331 }
3332
3333 nfsstat4
rfs4_get_state(stateid4 * stateid,rfs4_state_t ** spp,rfs4_dbsearch_type_t find_invalid)3334 rfs4_get_state(stateid4 *stateid, rfs4_state_t **spp,
3335 rfs4_dbsearch_type_t find_invalid)
3336 {
3337 return (rfs4_get_state_lockit(stateid, spp, find_invalid, TRUE));
3338 }
3339
3340 int
rfs4_check_stateid_seqid(rfs4_state_t * sp,stateid4 * stateid)3341 rfs4_check_stateid_seqid(rfs4_state_t *sp, stateid4 *stateid)
3342 {
3343 stateid_t *id = (stateid_t *)stateid;
3344
3345 if (rfs4_lease_expired(sp->rs_owner->ro_client))
3346 return (NFS4_CHECK_STATEID_EXPIRED);
3347
3348 /* Stateid is some time in the future - that's bad */
3349 if (sp->rs_stateid.bits.chgseq < id->bits.chgseq)
3350 return (NFS4_CHECK_STATEID_BAD);
3351
3352 if (sp->rs_stateid.bits.chgseq == id->bits.chgseq + 1)
3353 return (NFS4_CHECK_STATEID_REPLAY);
3354
3355 /* Stateid is some time in the past - that's old */
3356 if (sp->rs_stateid.bits.chgseq > id->bits.chgseq)
3357 return (NFS4_CHECK_STATEID_OLD);
3358
3359 /* Caller needs to know about confirmation before closure */
3360 if (sp->rs_owner->ro_need_confirm)
3361 return (NFS4_CHECK_STATEID_UNCONFIRMED);
3362
3363 if (sp->rs_closed == TRUE)
3364 return (NFS4_CHECK_STATEID_CLOSED);
3365
3366 return (NFS4_CHECK_STATEID_OKAY);
3367 }
3368
3369 int
rfs4_check_lo_stateid_seqid(rfs4_lo_state_t * lsp,stateid4 * stateid)3370 rfs4_check_lo_stateid_seqid(rfs4_lo_state_t *lsp, stateid4 *stateid)
3371 {
3372 stateid_t *id = (stateid_t *)stateid;
3373
3374 if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client))
3375 return (NFS4_CHECK_STATEID_EXPIRED);
3376
3377 /* Stateid is some time in the future - that's bad */
3378 if (lsp->rls_lockid.bits.chgseq < id->bits.chgseq)
3379 return (NFS4_CHECK_STATEID_BAD);
3380
3381 if (lsp->rls_lockid.bits.chgseq == id->bits.chgseq + 1)
3382 return (NFS4_CHECK_STATEID_REPLAY);
3383
3384 /* Stateid is some time in the past - that's old */
3385 if (lsp->rls_lockid.bits.chgseq > id->bits.chgseq)
3386 return (NFS4_CHECK_STATEID_OLD);
3387
3388 if (lsp->rls_state->rs_closed == TRUE)
3389 return (NFS4_CHECK_STATEID_CLOSED);
3390
3391 return (NFS4_CHECK_STATEID_OKAY);
3392 }
3393
3394 nfsstat4
rfs4_get_deleg_state(stateid4 * stateid,rfs4_deleg_state_t ** dspp)3395 rfs4_get_deleg_state(stateid4 *stateid, rfs4_deleg_state_t **dspp)
3396 {
3397 stateid_t *id = (stateid_t *)stateid;
3398 rfs4_deleg_state_t *dsp;
3399
3400 *dspp = NULL;
3401
3402 /* If we are booted as a cluster node, was stateid locally generated? */
3403 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3404 return (NFS4ERR_STALE_STATEID);
3405
3406 dsp = rfs4_finddelegstate(id);
3407 if (dsp == NULL) {
3408 return (what_stateid_error(id, DELEGID));
3409 }
3410
3411 if (rfs4_lease_expired(dsp->rds_client)) {
3412 rfs4_deleg_state_rele(dsp);
3413 return (NFS4ERR_EXPIRED);
3414 }
3415
3416 *dspp = dsp;
3417
3418 return (NFS4_OK);
3419 }
3420
3421 nfsstat4
rfs4_get_lo_state(stateid4 * stateid,rfs4_lo_state_t ** lspp,bool_t lock_fp)3422 rfs4_get_lo_state(stateid4 *stateid, rfs4_lo_state_t **lspp, bool_t lock_fp)
3423 {
3424 stateid_t *id = (stateid_t *)stateid;
3425 rfs4_lo_state_t *lsp;
3426
3427 *lspp = NULL;
3428
3429 /* If we are booted as a cluster node, was stateid locally generated? */
3430 if ((cluster_bootflags & CLUSTER_BOOTED) && foreign_stateid(id))
3431 return (NFS4ERR_STALE_STATEID);
3432
3433 lsp = rfs4_findlo_state(id, lock_fp);
3434 if (lsp == NULL) {
3435 return (what_stateid_error(id, LOCKID));
3436 }
3437
3438 if (rfs4_lease_expired(lsp->rls_state->rs_owner->ro_client)) {
3439 rfs4_lo_state_rele(lsp, lock_fp);
3440 return (NFS4ERR_EXPIRED);
3441 }
3442
3443 *lspp = lsp;
3444
3445 return (NFS4_OK);
3446 }
3447
3448 static nfsstat4
rfs4_get_all_state(stateid4 * sid,rfs4_state_t ** spp,rfs4_deleg_state_t ** dspp,rfs4_lo_state_t ** lspp)3449 rfs4_get_all_state(stateid4 *sid, rfs4_state_t **spp,
3450 rfs4_deleg_state_t **dspp, rfs4_lo_state_t **lspp)
3451 {
3452 rfs4_state_t *sp = NULL;
3453 rfs4_deleg_state_t *dsp = NULL;
3454 rfs4_lo_state_t *lsp = NULL;
3455 stateid_t *id;
3456 nfsstat4 status;
3457
3458 *spp = NULL; *dspp = NULL; *lspp = NULL;
3459
3460 id = (stateid_t *)sid;
3461 switch (id->bits.type) {
3462 case OPENID:
3463 status = rfs4_get_state_lockit(sid, &sp, FALSE, FALSE);
3464 break;
3465 case DELEGID:
3466 status = rfs4_get_deleg_state(sid, &dsp);
3467 break;
3468 case LOCKID:
3469 status = rfs4_get_lo_state(sid, &lsp, FALSE);
3470 if (status == NFS4_OK) {
3471 sp = lsp->rls_state;
3472 rfs4_dbe_hold(sp->rs_dbe);
3473 }
3474 break;
3475 default:
3476 status = NFS4ERR_BAD_STATEID;
3477 }
3478
3479 if (status == NFS4_OK) {
3480 *spp = sp;
3481 *dspp = dsp;
3482 *lspp = lsp;
3483 }
3484
3485 return (status);
3486 }
3487
3488 /*
3489 * Given the I/O mode (FREAD or FWRITE), this checks whether the
3490 * rfs4_state_t struct has access to do this operation and if so
3491 * return NFS4_OK; otherwise the proper NFSv4 error is returned.
3492 */
3493 nfsstat4
rfs4_state_has_access(rfs4_state_t * sp,int mode,vnode_t * vp)3494 rfs4_state_has_access(rfs4_state_t *sp, int mode, vnode_t *vp)
3495 {
3496 nfsstat4 stat = NFS4_OK;
3497 rfs4_file_t *fp;
3498 bool_t create = FALSE;
3499
3500 rfs4_dbe_lock(sp->rs_dbe);
3501 if (mode == FWRITE) {
3502 if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_WRITE)) {
3503 stat = NFS4ERR_OPENMODE;
3504 }
3505 } else if (mode == FREAD) {
3506 if (!(sp->rs_share_access & OPEN4_SHARE_ACCESS_READ)) {
3507 /*
3508 * If we have OPENed the file with DENYing access
3509 * to both READ and WRITE then no one else could
3510 * have OPENed the file, hence no conflicting READ
3511 * deny. This check is merely an optimization.
3512 */
3513 if (sp->rs_share_deny == OPEN4_SHARE_DENY_BOTH)
3514 goto out;
3515
3516 /* Check against file struct's DENY mode */
3517 fp = rfs4_findfile(vp, NULL, &create);
3518 if (fp != NULL) {
3519 int deny_read = 0;
3520 rfs4_dbe_lock(fp->rf_dbe);
3521 /*
3522 * Check if any other open owner has the file
3523 * OPENed with deny READ.
3524 */
3525 if (sp->rs_share_deny & OPEN4_SHARE_DENY_READ)
3526 deny_read = 1;
3527 ASSERT(fp->rf_deny_read >= deny_read);
3528 if (fp->rf_deny_read > deny_read)
3529 stat = NFS4ERR_OPENMODE;
3530 rfs4_dbe_unlock(fp->rf_dbe);
3531 rfs4_file_rele(fp);
3532 }
3533 }
3534 } else {
3535 /* Illegal I/O mode */
3536 stat = NFS4ERR_INVAL;
3537 }
3538 out:
3539 rfs4_dbe_unlock(sp->rs_dbe);
3540 return (stat);
3541 }
3542
3543 /*
3544 * Given the I/O mode (FREAD or FWRITE), the vnode, the stateid and whether
3545 * the file is being truncated, return NFS4_OK if allowed or appropriate
3546 * V4 error if not. Note NFS4ERR_DELAY will be returned and a recall on
3547 * the associated file will be done if the I/O is not consistent with any
3548 * delegation in effect on the file. Should be holding VOP_RWLOCK, either
3549 * as reader or writer as appropriate. rfs4_op_open will acquire the
3550 * VOP_RWLOCK as writer when setting up delegation. If the stateid is bad
3551 * this routine will return NFS4ERR_BAD_STATEID. In addition, through the
3552 * deleg parameter, we will return whether a write delegation is held by
3553 * the client associated with this stateid.
3554 * If the server instance associated with the relevant client is in its
3555 * grace period, return NFS4ERR_GRACE.
3556 */
3557
3558 nfsstat4
rfs4_check_stateid(int mode,vnode_t * vp,stateid4 * stateid,bool_t trunc,bool_t * deleg,bool_t do_access,caller_context_t * ct)3559 rfs4_check_stateid(int mode, vnode_t *vp,
3560 stateid4 *stateid, bool_t trunc, bool_t *deleg,
3561 bool_t do_access, caller_context_t *ct)
3562 {
3563 rfs4_file_t *fp;
3564 bool_t create = FALSE;
3565 rfs4_state_t *sp;
3566 rfs4_deleg_state_t *dsp;
3567 rfs4_lo_state_t *lsp;
3568 stateid_t *id = (stateid_t *)stateid;
3569 nfsstat4 stat = NFS4_OK;
3570
3571 if (ct != NULL) {
3572 ct->cc_sysid = 0;
3573 ct->cc_pid = 0;
3574 ct->cc_caller_id = nfs4_srv_caller_id;
3575 ct->cc_flags = CC_DONTBLOCK;
3576 }
3577
3578 if (ISSPECIAL(stateid)) {
3579 fp = rfs4_findfile(vp, NULL, &create);
3580 if (fp == NULL)
3581 return (NFS4_OK);
3582 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_NONE) {
3583 rfs4_file_rele(fp);
3584 return (NFS4_OK);
3585 }
3586 if (mode == FWRITE ||
3587 fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_WRITE) {
3588 rfs4_recall_deleg(fp, trunc, NULL);
3589 rfs4_file_rele(fp);
3590 return (NFS4ERR_DELAY);
3591 }
3592 rfs4_file_rele(fp);
3593 return (NFS4_OK);
3594 } else {
3595 stat = rfs4_get_all_state(stateid, &sp, &dsp, &lsp);
3596 if (stat != NFS4_OK)
3597 return (stat);
3598 if (lsp != NULL) {
3599 /* Is associated server instance in its grace period? */
3600 if (rfs4_clnt_in_grace(lsp->rls_locker->rl_client)) {
3601 rfs4_lo_state_rele(lsp, FALSE);
3602 if (sp != NULL)
3603 rfs4_state_rele_nounlock(sp);
3604 return (NFS4ERR_GRACE);
3605 }
3606 if (id->bits.type == LOCKID) {
3607 /* Seqid in the future? - that's bad */
3608 if (lsp->rls_lockid.bits.chgseq <
3609 id->bits.chgseq) {
3610 rfs4_lo_state_rele(lsp, FALSE);
3611 if (sp != NULL)
3612 rfs4_state_rele_nounlock(sp);
3613 return (NFS4ERR_BAD_STATEID);
3614 }
3615 /* Seqid in the past? - that's old */
3616 if (lsp->rls_lockid.bits.chgseq >
3617 id->bits.chgseq) {
3618 rfs4_lo_state_rele(lsp, FALSE);
3619 if (sp != NULL)
3620 rfs4_state_rele_nounlock(sp);
3621 return (NFS4ERR_OLD_STATEID);
3622 }
3623 /* Ensure specified filehandle matches */
3624 if (lsp->rls_state->rs_finfo->rf_vp != vp) {
3625 rfs4_lo_state_rele(lsp, FALSE);
3626 if (sp != NULL)
3627 rfs4_state_rele_nounlock(sp);
3628 return (NFS4ERR_BAD_STATEID);
3629 }
3630 }
3631 if (ct != NULL) {
3632 ct->cc_sysid =
3633 lsp->rls_locker->rl_client->rc_sysidt;
3634 ct->cc_pid = lsp->rls_locker->rl_pid;
3635 }
3636 rfs4_lo_state_rele(lsp, FALSE);
3637 }
3638
3639 /* Stateid provided was an "open" stateid */
3640 if (sp != NULL) {
3641 /* Is associated server instance in its grace period? */
3642 if (rfs4_clnt_in_grace(sp->rs_owner->ro_client)) {
3643 rfs4_state_rele_nounlock(sp);
3644 return (NFS4ERR_GRACE);
3645 }
3646 if (id->bits.type == OPENID) {
3647 /* Seqid in the future? - that's bad */
3648 if (sp->rs_stateid.bits.chgseq <
3649 id->bits.chgseq) {
3650 rfs4_state_rele_nounlock(sp);
3651 return (NFS4ERR_BAD_STATEID);
3652 }
3653 /* Seqid in the past - that's old */
3654 if (sp->rs_stateid.bits.chgseq >
3655 id->bits.chgseq) {
3656 rfs4_state_rele_nounlock(sp);
3657 return (NFS4ERR_OLD_STATEID);
3658 }
3659 }
3660 /* Ensure specified filehandle matches */
3661 if (sp->rs_finfo->rf_vp != vp) {
3662 rfs4_state_rele_nounlock(sp);
3663 return (NFS4ERR_BAD_STATEID);
3664 }
3665
3666 if (sp->rs_owner->ro_need_confirm) {
3667 rfs4_state_rele_nounlock(sp);
3668 return (NFS4ERR_BAD_STATEID);
3669 }
3670
3671 if (sp->rs_closed == TRUE) {
3672 rfs4_state_rele_nounlock(sp);
3673 return (NFS4ERR_OLD_STATEID);
3674 }
3675
3676 if (do_access)
3677 stat = rfs4_state_has_access(sp, mode, vp);
3678 else
3679 stat = NFS4_OK;
3680
3681 /*
3682 * Return whether this state has write
3683 * delegation if desired
3684 */
3685 if (deleg && (sp->rs_finfo->rf_dinfo.rd_dtype ==
3686 OPEN_DELEGATE_WRITE))
3687 *deleg = TRUE;
3688
3689 /*
3690 * We got a valid stateid, so we update the
3691 * lease on the client. Ideally we would like
3692 * to do this after the calling op succeeds,
3693 * but for now this will be good
3694 * enough. Callers of this routine are
3695 * currently insulated from the state stuff.
3696 */
3697 rfs4_update_lease(sp->rs_owner->ro_client);
3698
3699 /*
3700 * If a delegation is present on this file and
3701 * this is a WRITE, then update the lastwrite
3702 * time to indicate that activity is present.
3703 */
3704 if (sp->rs_finfo->rf_dinfo.rd_dtype ==
3705 OPEN_DELEGATE_WRITE &&
3706 mode == FWRITE) {
3707 sp->rs_finfo->rf_dinfo.rd_time_lastwrite =
3708 gethrestime_sec();
3709 }
3710
3711 rfs4_state_rele_nounlock(sp);
3712
3713 return (stat);
3714 }
3715
3716 if (dsp != NULL) {
3717 /* Is associated server instance in its grace period? */
3718 if (rfs4_clnt_in_grace(dsp->rds_client)) {
3719 rfs4_deleg_state_rele(dsp);
3720 return (NFS4ERR_GRACE);
3721 }
3722 if (dsp->rds_delegid.bits.chgseq != id->bits.chgseq) {
3723 rfs4_deleg_state_rele(dsp);
3724 return (NFS4ERR_BAD_STATEID);
3725 }
3726
3727 /* Ensure specified filehandle matches */
3728 if (dsp->rds_finfo->rf_vp != vp) {
3729 rfs4_deleg_state_rele(dsp);
3730 return (NFS4ERR_BAD_STATEID);
3731 }
3732 /*
3733 * Return whether this state has write
3734 * delegation if desired
3735 */
3736 if (deleg && (dsp->rds_finfo->rf_dinfo.rd_dtype ==
3737 OPEN_DELEGATE_WRITE))
3738 *deleg = TRUE;
3739
3740 rfs4_update_lease(dsp->rds_client);
3741
3742 /*
3743 * If a delegation is present on this file and
3744 * this is a WRITE, then update the lastwrite
3745 * time to indicate that activity is present.
3746 */
3747 if (dsp->rds_finfo->rf_dinfo.rd_dtype ==
3748 OPEN_DELEGATE_WRITE && mode == FWRITE) {
3749 dsp->rds_finfo->rf_dinfo.rd_time_lastwrite =
3750 gethrestime_sec();
3751 }
3752
3753 /*
3754 * XXX - what happens if this is a WRITE and the
3755 * delegation type of for READ.
3756 */
3757 rfs4_deleg_state_rele(dsp);
3758
3759 return (stat);
3760 }
3761 /*
3762 * If we got this far, something bad happened
3763 */
3764 return (NFS4ERR_BAD_STATEID);
3765 }
3766 }
3767
3768
3769 /*
3770 * This is a special function in that for the file struct provided the
3771 * server wants to remove/close all current state associated with the
3772 * file. The prime use of this would be with OP_REMOVE to force the
3773 * release of state and particularly of file locks.
3774 *
3775 * There is an assumption that there is no delegations outstanding on
3776 * this file at this point. The caller should have waited for those
3777 * to be returned or revoked.
3778 */
3779 void
rfs4_close_all_state(rfs4_file_t * fp)3780 rfs4_close_all_state(rfs4_file_t *fp)
3781 {
3782 rfs4_state_t *sp;
3783
3784 rfs4_dbe_lock(fp->rf_dbe);
3785
3786 #ifdef DEBUG
3787 /* only applies when server is handing out delegations */
3788 if (rfs4_deleg_policy != SRV_NEVER_DELEGATE)
3789 ASSERT(fp->rf_dinfo.rd_hold_grant > 0);
3790 #endif
3791
3792 /* No delegations for this file */
3793 ASSERT(list_is_empty(&fp->rf_delegstatelist));
3794
3795 /* Make sure that it can not be found */
3796 rfs4_dbe_invalidate(fp->rf_dbe);
3797
3798 if (fp->rf_vp == NULL) {
3799 rfs4_dbe_unlock(fp->rf_dbe);
3800 return;
3801 }
3802 rfs4_dbe_unlock(fp->rf_dbe);
3803
3804 /*
3805 * Hold as writer to prevent other server threads from
3806 * processing requests related to the file while all state is
3807 * being removed.
3808 */
3809 rw_enter(&fp->rf_file_rwlock, RW_WRITER);
3810
3811 /* Remove ALL state from the file */
3812 while (sp = rfs4_findstate_by_file(fp)) {
3813 rfs4_state_close(sp, FALSE, FALSE, CRED());
3814 rfs4_state_rele_nounlock(sp);
3815 }
3816
3817 /*
3818 * This is only safe since there are no further references to
3819 * the file.
3820 */
3821 rfs4_dbe_lock(fp->rf_dbe);
3822 if (fp->rf_vp) {
3823 vnode_t *vp = fp->rf_vp;
3824
3825 mutex_enter(&vp->v_vsd_lock);
3826 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
3827 mutex_exit(&vp->v_vsd_lock);
3828 VN_RELE(vp);
3829 fp->rf_vp = NULL;
3830 }
3831 rfs4_dbe_unlock(fp->rf_dbe);
3832
3833 /* Finally let other references to proceed */
3834 rw_exit(&fp->rf_file_rwlock);
3835 }
3836
3837 /*
3838 * This function is used as a target for the rfs4_dbe_walk() call
3839 * below. The purpose of this function is to see if the
3840 * lockowner_state refers to a file that resides within the exportinfo
3841 * export. If so, then remove the lock_owner state (file locks and
3842 * share "locks") for this object since the intent is the server is
3843 * unexporting the specified directory. Be sure to invalidate the
3844 * object after the state has been released
3845 */
3846 static void
rfs4_lo_state_walk_callout(rfs4_entry_t u_entry,void * e)3847 rfs4_lo_state_walk_callout(rfs4_entry_t u_entry, void *e)
3848 {
3849 rfs4_lo_state_t *lsp = (rfs4_lo_state_t *)u_entry;
3850 struct exportinfo *exi = (struct exportinfo *)e;
3851 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
3852 fhandle_t *efhp;
3853
3854 efhp = (fhandle_t *)&exi->exi_fh;
3855 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
3856
3857 FH_TO_FMT4(efhp, exi_fhp);
3858
3859 finfo_fhp = (nfs_fh4_fmt_t *)lsp->rls_state->rs_finfo->
3860 rf_filehandle.nfs_fh4_val;
3861
3862 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
3863 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
3864 exi_fhp->fh4_xlen) == 0) {
3865 rfs4_state_close(lsp->rls_state, FALSE, FALSE, CRED());
3866 rfs4_dbe_invalidate(lsp->rls_dbe);
3867 rfs4_dbe_invalidate(lsp->rls_state->rs_dbe);
3868 }
3869 }
3870
3871 /*
3872 * This function is used as a target for the rfs4_dbe_walk() call
3873 * below. The purpose of this function is to see if the state refers
3874 * to a file that resides within the exportinfo export. If so, then
3875 * remove the open state for this object since the intent is the
3876 * server is unexporting the specified directory. The main result for
3877 * this type of entry is to invalidate it such it will not be found in
3878 * the future.
3879 */
3880 static void
rfs4_state_walk_callout(rfs4_entry_t u_entry,void * e)3881 rfs4_state_walk_callout(rfs4_entry_t u_entry, void *e)
3882 {
3883 rfs4_state_t *sp = (rfs4_state_t *)u_entry;
3884 struct exportinfo *exi = (struct exportinfo *)e;
3885 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
3886 fhandle_t *efhp;
3887
3888 efhp = (fhandle_t *)&exi->exi_fh;
3889 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
3890
3891 FH_TO_FMT4(efhp, exi_fhp);
3892
3893 finfo_fhp =
3894 (nfs_fh4_fmt_t *)sp->rs_finfo->rf_filehandle.nfs_fh4_val;
3895
3896 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
3897 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
3898 exi_fhp->fh4_xlen) == 0) {
3899 rfs4_state_close(sp, TRUE, FALSE, CRED());
3900 rfs4_dbe_invalidate(sp->rs_dbe);
3901 }
3902 }
3903
3904 /*
3905 * This function is used as a target for the rfs4_dbe_walk() call
3906 * below. The purpose of this function is to see if the state refers
3907 * to a file that resides within the exportinfo export. If so, then
3908 * remove the deleg state for this object since the intent is the
3909 * server is unexporting the specified directory. The main result for
3910 * this type of entry is to invalidate it such it will not be found in
3911 * the future.
3912 */
3913 static void
rfs4_deleg_state_walk_callout(rfs4_entry_t u_entry,void * e)3914 rfs4_deleg_state_walk_callout(rfs4_entry_t u_entry, void *e)
3915 {
3916 rfs4_deleg_state_t *dsp = (rfs4_deleg_state_t *)u_entry;
3917 struct exportinfo *exi = (struct exportinfo *)e;
3918 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
3919 fhandle_t *efhp;
3920
3921 efhp = (fhandle_t *)&exi->exi_fh;
3922 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
3923
3924 FH_TO_FMT4(efhp, exi_fhp);
3925
3926 finfo_fhp =
3927 (nfs_fh4_fmt_t *)dsp->rds_finfo->rf_filehandle.nfs_fh4_val;
3928
3929 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
3930 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
3931 exi_fhp->fh4_xlen) == 0) {
3932 rfs4_dbe_invalidate(dsp->rds_dbe);
3933 }
3934 }
3935
3936 /*
3937 * This function is used as a target for the rfs4_dbe_walk() call
3938 * below. The purpose of this function is to see if the state refers
3939 * to a file that resides within the exportinfo export. If so, then
3940 * release vnode hold for this object since the intent is the server
3941 * is unexporting the specified directory. Invalidation will prevent
3942 * this struct from being found in the future.
3943 */
3944 static void
rfs4_file_walk_callout(rfs4_entry_t u_entry,void * e)3945 rfs4_file_walk_callout(rfs4_entry_t u_entry, void *e)
3946 {
3947 rfs4_file_t *fp = (rfs4_file_t *)u_entry;
3948 struct exportinfo *exi = (struct exportinfo *)e;
3949 nfs_fh4_fmt_t fhfmt4, *exi_fhp, *finfo_fhp;
3950 fhandle_t *efhp;
3951
3952 efhp = (fhandle_t *)&exi->exi_fh;
3953 exi_fhp = (nfs_fh4_fmt_t *)&fhfmt4;
3954
3955 FH_TO_FMT4(efhp, exi_fhp);
3956
3957 finfo_fhp = (nfs_fh4_fmt_t *)fp->rf_filehandle.nfs_fh4_val;
3958
3959 if (EQFSID(&finfo_fhp->fh4_fsid, &exi_fhp->fh4_fsid) &&
3960 bcmp(&finfo_fhp->fh4_xdata, &exi_fhp->fh4_xdata,
3961 exi_fhp->fh4_xlen) == 0) {
3962 if (fp->rf_vp) {
3963 vnode_t *vp = fp->rf_vp;
3964
3965 /*
3966 * don't leak monitors and remove the reference
3967 * put on the vnode when the delegation was granted.
3968 */
3969 if (fp->rf_dinfo.rd_dtype == OPEN_DELEGATE_READ) {
3970 (void) fem_uninstall(vp, deleg_rdops,
3971 (void *)fp);
3972 vn_open_downgrade(vp, FREAD);
3973 } else if (fp->rf_dinfo.rd_dtype ==
3974 OPEN_DELEGATE_WRITE) {
3975 (void) fem_uninstall(vp, deleg_wrops,
3976 (void *)fp);
3977 vn_open_downgrade(vp, FREAD|FWRITE);
3978 }
3979 mutex_enter(&vp->v_vsd_lock);
3980 (void) vsd_set(vp, nfs4_srv_vkey, NULL);
3981 mutex_exit(&vp->v_vsd_lock);
3982 VN_RELE(vp);
3983 fp->rf_vp = NULL;
3984 }
3985 rfs4_dbe_invalidate(fp->rf_dbe);
3986 }
3987 }
3988
3989 /*
3990 * Given a directory that is being unexported, cleanup/release all
3991 * state in the server that refers to objects residing underneath this
3992 * particular export. The ordering of the release is important.
3993 * Lock_owner, then state and then file.
3994 */
3995 void
rfs4_clean_state_exi(struct exportinfo * exi)3996 rfs4_clean_state_exi(struct exportinfo *exi)
3997 {
3998 mutex_enter(&rfs4_state_lock);
3999
4000 if (rfs4_server_state == NULL) {
4001 mutex_exit(&rfs4_state_lock);
4002 return;
4003 }
4004
4005 rfs4_dbe_walk(rfs4_lo_state_tab, rfs4_lo_state_walk_callout, exi);
4006 rfs4_dbe_walk(rfs4_state_tab, rfs4_state_walk_callout, exi);
4007 rfs4_dbe_walk(rfs4_deleg_state_tab, rfs4_deleg_state_walk_callout, exi);
4008 rfs4_dbe_walk(rfs4_file_tab, rfs4_file_walk_callout, exi);
4009
4010 mutex_exit(&rfs4_state_lock);
4011 }
4012