1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 /*
27 * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
28 * All Rights Reserved
29 */
30
31 /*
32 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
33 * Copyright (c) 2017 by Delphix. All rights reserved.
34 */
35
36 #include <sys/param.h>
37 #include <sys/types.h>
38 #include <sys/systm.h>
39 #include <sys/cred.h>
40 #include <sys/proc.h>
41 #include <sys/user.h>
42 #include <sys/time.h>
43 #include <sys/buf.h>
44 #include <sys/vfs.h>
45 #include <sys/vnode.h>
46 #include <sys/socket.h>
47 #include <sys/uio.h>
48 #include <sys/tiuser.h>
49 #include <sys/swap.h>
50 #include <sys/errno.h>
51 #include <sys/debug.h>
52 #include <sys/kmem.h>
53 #include <sys/kstat.h>
54 #include <sys/cmn_err.h>
55 #include <sys/vtrace.h>
56 #include <sys/session.h>
57 #include <sys/dnlc.h>
58 #include <sys/bitmap.h>
59 #include <sys/acl.h>
60 #include <sys/ddi.h>
61 #include <sys/pathname.h>
62 #include <sys/flock.h>
63 #include <sys/dirent.h>
64 #include <sys/flock.h>
65 #include <sys/callb.h>
66 #include <sys/sdt.h>
67
68 #include <vm/pvn.h>
69
70 #include <rpc/types.h>
71 #include <rpc/xdr.h>
72 #include <rpc/auth.h>
73 #include <rpc/rpcsec_gss.h>
74 #include <rpc/clnt.h>
75
76 #include <nfs/nfs.h>
77 #include <nfs/nfs_clnt.h>
78 #include <nfs/nfs_acl.h>
79
80 #include <nfs/nfs4.h>
81 #include <nfs/rnode4.h>
82 #include <nfs/nfs4_clnt.h>
83
84 /*
85 * The hash queues for the access to active and cached rnodes
86 * are organized as doubly linked lists. A reader/writer lock
87 * for each hash bucket is used to control access and to synchronize
88 * lookups, additions, and deletions from the hash queue.
89 *
90 * The rnode freelist is organized as a doubly linked list with
91 * a head pointer. Additions and deletions are synchronized via
92 * a single mutex.
93 *
94 * In order to add an rnode to the free list, it must be hashed into
95 * a hash queue and the exclusive lock to the hash queue be held.
96 * If an rnode is not hashed into a hash queue, then it is destroyed
97 * because it represents no valuable information that can be reused
98 * about the file. The exclusive lock to the hash queue must be
99 * held in order to prevent a lookup in the hash queue from finding
100 * the rnode and using it and assuming that the rnode is not on the
101 * freelist. The lookup in the hash queue will have the hash queue
102 * locked, either exclusive or shared.
103 *
104 * The vnode reference count for each rnode is not allowed to drop
105 * below 1. This prevents external entities, such as the VM
106 * subsystem, from acquiring references to vnodes already on the
107 * freelist and then trying to place them back on the freelist
108 * when their reference is released. This means that the when an
109 * rnode is looked up in the hash queues, then either the rnode
110 * is removed from the freelist and that reference is transferred to
111 * the new reference or the vnode reference count must be incremented
112 * accordingly. The mutex for the freelist must be held in order to
113 * accurately test to see if the rnode is on the freelist or not.
114 * The hash queue lock might be held shared and it is possible that
115 * two different threads may race to remove the rnode from the
116 * freelist. This race can be resolved by holding the mutex for the
117 * freelist. Please note that the mutex for the freelist does not
118 * need to be held if the rnode is not on the freelist. It can not be
119 * placed on the freelist due to the requirement that the thread
120 * putting the rnode on the freelist must hold the exclusive lock
121 * to the hash queue and the thread doing the lookup in the hash
122 * queue is holding either a shared or exclusive lock to the hash
123 * queue.
124 *
125 * The lock ordering is:
126 *
127 * hash bucket lock -> vnode lock
128 * hash bucket lock -> freelist lock -> r_statelock
129 */
130 r4hashq_t *rtable4;
131
132 static kmutex_t rp4freelist_lock;
133 static rnode4_t *rp4freelist = NULL;
134 static long rnode4_new = 0;
135 int rtable4size;
136 static int rtable4mask;
137 static struct kmem_cache *rnode4_cache;
138 static int rnode4_hashlen = 4;
139
140 static void r4inactive(rnode4_t *, cred_t *);
141 static vnode_t *make_rnode4(nfs4_sharedfh_t *, r4hashq_t *, struct vfs *,
142 struct vnodeops *,
143 int (*)(vnode_t *, page_t *, u_offset_t *, size_t *, int,
144 cred_t *),
145 int *, cred_t *);
146 static void rp4_rmfree(rnode4_t *);
147 int nfs4_free_data_reclaim(rnode4_t *);
148 static int nfs4_active_data_reclaim(rnode4_t *);
149 static int nfs4_free_reclaim(void);
150 static int nfs4_active_reclaim(void);
151 static int nfs4_rnode_reclaim(void);
152 static void nfs4_reclaim(void *);
153 static int isrootfh(nfs4_sharedfh_t *, rnode4_t *);
154 static void uninit_rnode4(rnode4_t *);
155 static void destroy_rnode4(rnode4_t *);
156 static void r4_stub_set(rnode4_t *, nfs4_stub_type_t);
157
158 #ifdef DEBUG
159 static int r4_check_for_dups = 0; /* Flag to enable dup rnode detection. */
160 static int nfs4_rnode_debug = 0;
161 /* if nonzero, kmem_cache_free() rnodes rather than place on freelist */
162 static int nfs4_rnode_nofreelist = 0;
163 /* give messages on colliding shared filehandles */
164 static void r4_dup_check(rnode4_t *, vfs_t *);
165 #endif
166
167 /*
168 * If the vnode has pages, run the list and check for any that are
169 * still dangling. We call this routine before putting an rnode on
170 * the free list.
171 */
172 static int
nfs4_dross_pages(vnode_t * vp)173 nfs4_dross_pages(vnode_t *vp)
174 {
175 page_t *pp;
176 kmutex_t *vphm;
177
178 vphm = page_vnode_mutex(vp);
179 mutex_enter(vphm);
180 if ((pp = vp->v_pages) != NULL) {
181 do {
182 if (pp->p_hash != PVN_VPLIST_HASH_TAG &&
183 pp->p_fsdata != C_NOCOMMIT) {
184 mutex_exit(vphm);
185 return (1);
186 }
187 } while ((pp = pp->p_vpnext) != vp->v_pages);
188 }
189 mutex_exit(vphm);
190
191 return (0);
192 }
193
194 /*
195 * Flush any pages left on this rnode.
196 */
197 static void
r4flushpages(rnode4_t * rp,cred_t * cr)198 r4flushpages(rnode4_t *rp, cred_t *cr)
199 {
200 vnode_t *vp;
201 int error;
202
203 /*
204 * Before freeing anything, wait until all asynchronous
205 * activity is done on this rnode. This will allow all
206 * asynchronous read ahead and write behind i/o's to
207 * finish.
208 */
209 mutex_enter(&rp->r_statelock);
210 while (rp->r_count > 0)
211 cv_wait(&rp->r_cv, &rp->r_statelock);
212 mutex_exit(&rp->r_statelock);
213
214 /*
215 * Flush and invalidate all pages associated with the vnode.
216 */
217 vp = RTOV4(rp);
218 if (nfs4_has_pages(vp)) {
219 ASSERT(vp->v_type != VCHR);
220 if ((rp->r_flags & R4DIRTY) && !rp->r_error) {
221 error = VOP_PUTPAGE(vp, (u_offset_t)0, 0, 0, cr, NULL);
222 if (error && (error == ENOSPC || error == EDQUOT)) {
223 mutex_enter(&rp->r_statelock);
224 if (!rp->r_error)
225 rp->r_error = error;
226 mutex_exit(&rp->r_statelock);
227 }
228 }
229 nfs4_invalidate_pages(vp, (u_offset_t)0, cr);
230 }
231 }
232
233 /*
234 * Free the resources associated with an rnode.
235 */
236 static void
r4inactive(rnode4_t * rp,cred_t * cr)237 r4inactive(rnode4_t *rp, cred_t *cr)
238 {
239 vnode_t *vp;
240 char *contents;
241 int size;
242 vsecattr_t *vsp;
243 vnode_t *xattr;
244
245 r4flushpages(rp, cr);
246
247 vp = RTOV4(rp);
248
249 /*
250 * Free any held caches which may be
251 * associated with this rnode.
252 */
253 mutex_enter(&rp->r_statelock);
254 contents = rp->r_symlink.contents;
255 size = rp->r_symlink.size;
256 rp->r_symlink.contents = NULL;
257 vsp = rp->r_secattr;
258 rp->r_secattr = NULL;
259 xattr = rp->r_xattr_dir;
260 rp->r_xattr_dir = NULL;
261 mutex_exit(&rp->r_statelock);
262
263 /*
264 * Free the access cache entries.
265 */
266 (void) nfs4_access_purge_rp(rp);
267
268 /*
269 * Free the readdir cache entries.
270 */
271 nfs4_purge_rddir_cache(vp);
272
273 /*
274 * Free the symbolic link cache.
275 */
276 if (contents != NULL) {
277
278 kmem_free((void *)contents, size);
279 }
280
281 /*
282 * Free any cached ACL.
283 */
284 if (vsp != NULL)
285 nfs4_acl_free_cache(vsp);
286
287 /*
288 * Release the cached xattr_dir
289 */
290 if (xattr != NULL)
291 VN_RELE(xattr);
292 }
293
294 /*
295 * We have seen a case that the fh passed in is for "." which
296 * should be a VROOT node, however, the fh is different from the
297 * root fh stored in the mntinfo4_t. The invalid fh might be
298 * from a misbehaved server and will panic the client system at
299 * a later time. To avoid the panic, we drop the bad fh, use
300 * the root fh from mntinfo4_t, and print an error message
301 * for attention.
302 */
303 nfs4_sharedfh_t *
badrootfh_check(nfs4_sharedfh_t * fh,nfs4_fname_t * nm,mntinfo4_t * mi,int * wasbad)304 badrootfh_check(nfs4_sharedfh_t *fh, nfs4_fname_t *nm, mntinfo4_t *mi,
305 int *wasbad)
306 {
307 char *s;
308
309 *wasbad = 0;
310 s = fn_name(nm);
311 ASSERT(strcmp(s, "..") != 0);
312
313 if ((s[0] == '.' && s[1] == '\0') && fh &&
314 !SFH4_SAME(mi->mi_rootfh, fh)) {
315 #ifdef DEBUG
316 nfs4_fhandle_t fhandle;
317
318 zcmn_err(mi->mi_zone->zone_id, CE_WARN,
319 "Server %s returns a different "
320 "root filehandle for the path %s:",
321 mi->mi_curr_serv->sv_hostname,
322 mi->mi_curr_serv->sv_path);
323
324 /* print the bad fh */
325 fhandle.fh_len = fh->sfh_fh.nfs_fh4_len;
326 bcopy(fh->sfh_fh.nfs_fh4_val, fhandle.fh_buf,
327 fhandle.fh_len);
328 nfs4_printfhandle(&fhandle);
329
330 /* print mi_rootfh */
331 fhandle.fh_len = mi->mi_rootfh->sfh_fh.nfs_fh4_len;
332 bcopy(mi->mi_rootfh->sfh_fh.nfs_fh4_val, fhandle.fh_buf,
333 fhandle.fh_len);
334 nfs4_printfhandle(&fhandle);
335 #endif
336 /* use mi_rootfh instead; fh will be rele by the caller */
337 fh = mi->mi_rootfh;
338 *wasbad = 1;
339 }
340
341 kmem_free(s, MAXNAMELEN);
342 return (fh);
343 }
344
345 void
r4_do_attrcache(vnode_t * vp,nfs4_ga_res_t * garp,int newnode,hrtime_t t,cred_t * cr,int index)346 r4_do_attrcache(vnode_t *vp, nfs4_ga_res_t *garp, int newnode,
347 hrtime_t t, cred_t *cr, int index)
348 {
349 int is_stub;
350 vattr_t *attr;
351 /*
352 * Don't add to attrcache if time overflow, but
353 * no need to check because either attr is null or the time
354 * values in it were processed by nfs4_time_ntov(), which checks
355 * for time overflows.
356 */
357 attr = garp ? &garp->n4g_va : NULL;
358
359 if (attr) {
360 if (!newnode) {
361 rw_exit(&rtable4[index].r_lock);
362 #ifdef DEBUG
363 if (vp->v_type != attr->va_type &&
364 vp->v_type != VNON && attr->va_type != VNON) {
365 zcmn_err(VTOMI4(vp)->mi_zone->zone_id, CE_WARN,
366 "makenfs4node: type (%d) doesn't "
367 "match type of found node at %p (%d)",
368 attr->va_type, (void *)vp, vp->v_type);
369 }
370 #endif
371 nfs4_attr_cache(vp, garp, t, cr, TRUE, NULL);
372 } else {
373 rnode4_t *rp = VTOR4(vp);
374
375 vp->v_type = attr->va_type;
376 vp->v_rdev = attr->va_rdev;
377
378 /*
379 * Turn this object into a "stub" object if we
380 * crossed an underlying server fs boundary.
381 * To make this check, during mount we save the
382 * fsid of the server object being mounted.
383 * Here we compare this object's server fsid
384 * with the fsid we saved at mount. If they
385 * are different, we crossed server fs boundary.
386 *
387 * The stub type is set (or not) at rnode
388 * creation time and it never changes for life
389 * of the rnode.
390 *
391 * This stub will be for a mirror-mount, rather than
392 * a referral (the latter also sets R4SRVSTUB).
393 *
394 * The stub type is also set during RO failover,
395 * nfs4_remap_file().
396 *
397 * We don't bother with taking r_state_lock to
398 * set the stub type because this is a new rnode
399 * and we're holding the hash bucket r_lock RW_WRITER.
400 * No other thread could have obtained access
401 * to this rnode.
402 */
403 is_stub = 0;
404 if (garp->n4g_fsid_valid) {
405 fattr4_fsid ga_fsid = garp->n4g_fsid;
406 servinfo4_t *svp = rp->r_server;
407
408 rp->r_srv_fsid = ga_fsid;
409
410 (void) nfs_rw_enter_sig(&svp->sv_lock,
411 RW_READER, 0);
412 if (!FATTR4_FSID_EQ(&ga_fsid, &svp->sv_fsid))
413 is_stub = 1;
414 nfs_rw_exit(&svp->sv_lock);
415 }
416
417 if (is_stub)
418 r4_stub_mirrormount(rp);
419 else
420 r4_stub_none(rp);
421
422 /* Can not cache partial attr */
423 if (attr->va_mask == AT_ALL)
424 nfs4_attrcache_noinval(vp, garp, t);
425 else
426 PURGE_ATTRCACHE4(vp);
427
428 rw_exit(&rtable4[index].r_lock);
429 }
430 } else {
431 if (newnode) {
432 PURGE_ATTRCACHE4(vp);
433 }
434 rw_exit(&rtable4[index].r_lock);
435 }
436 }
437
438 /*
439 * Find or create an rnode based primarily on filehandle. To be
440 * used when dvp (vnode for parent directory) is not available;
441 * otherwise, makenfs4node() should be used.
442 *
443 * The nfs4_fname_t argument *npp is consumed and nulled out.
444 */
445
446 vnode_t *
makenfs4node_by_fh(nfs4_sharedfh_t * sfh,nfs4_sharedfh_t * psfh,nfs4_fname_t ** npp,nfs4_ga_res_t * garp,mntinfo4_t * mi,cred_t * cr,hrtime_t t)447 makenfs4node_by_fh(nfs4_sharedfh_t *sfh, nfs4_sharedfh_t *psfh,
448 nfs4_fname_t **npp, nfs4_ga_res_t *garp,
449 mntinfo4_t *mi, cred_t *cr, hrtime_t t)
450 {
451 vfs_t *vfsp = mi->mi_vfsp;
452 int newnode = 0;
453 vnode_t *vp;
454 rnode4_t *rp;
455 svnode_t *svp;
456 nfs4_fname_t *name, *svpname;
457 int index;
458
459 ASSERT(npp && *npp);
460 name = *npp;
461 *npp = NULL;
462
463 index = rtable4hash(sfh);
464 rw_enter(&rtable4[index].r_lock, RW_READER);
465
466 vp = make_rnode4(sfh, &rtable4[index], vfsp,
467 nfs4_vnodeops, nfs4_putapage, &newnode, cr);
468
469 svp = VTOSV(vp);
470 rp = VTOR4(vp);
471 if (newnode) {
472 svp->sv_forw = svp->sv_back = svp;
473 svp->sv_name = name;
474 if (psfh != NULL)
475 sfh4_hold(psfh);
476 svp->sv_dfh = psfh;
477 } else {
478 /*
479 * It is possible that due to a server
480 * side rename fnames have changed.
481 * update the fname here.
482 */
483 mutex_enter(&rp->r_svlock);
484 svpname = svp->sv_name;
485 if (svp->sv_name != name) {
486 svp->sv_name = name;
487 mutex_exit(&rp->r_svlock);
488 fn_rele(&svpname);
489 } else {
490 mutex_exit(&rp->r_svlock);
491 fn_rele(&name);
492 }
493 }
494
495 ASSERT(RW_LOCK_HELD(&rtable4[index].r_lock));
496 r4_do_attrcache(vp, garp, newnode, t, cr, index);
497 ASSERT(rw_owner(&rtable4[index].r_lock) != curthread);
498
499 return (vp);
500 }
501
502 /*
503 * Find or create a vnode for the given filehandle, filesystem, parent, and
504 * name. The reference to nm is consumed, so the caller must first do an
505 * fn_hold() if it wants to continue using nm after this call.
506 */
507 vnode_t *
makenfs4node(nfs4_sharedfh_t * fh,nfs4_ga_res_t * garp,struct vfs * vfsp,hrtime_t t,cred_t * cr,vnode_t * dvp,nfs4_fname_t * nm)508 makenfs4node(nfs4_sharedfh_t *fh, nfs4_ga_res_t *garp, struct vfs *vfsp,
509 hrtime_t t, cred_t *cr, vnode_t *dvp, nfs4_fname_t *nm)
510 {
511 vnode_t *vp;
512 int newnode;
513 int index;
514 mntinfo4_t *mi = VFTOMI4(vfsp);
515 int had_badfh = 0;
516 rnode4_t *rp;
517
518 ASSERT(dvp != NULL);
519
520 fh = badrootfh_check(fh, nm, mi, &had_badfh);
521
522 index = rtable4hash(fh);
523 rw_enter(&rtable4[index].r_lock, RW_READER);
524
525 /*
526 * Note: make_rnode4() may upgrade the hash bucket lock to exclusive.
527 */
528 vp = make_rnode4(fh, &rtable4[index], vfsp, nfs4_vnodeops,
529 nfs4_putapage, &newnode, cr);
530
531 rp = VTOR4(vp);
532 sv_activate(&vp, dvp, &nm, newnode);
533 if (dvp->v_flag & V_XATTRDIR) {
534 mutex_enter(&rp->r_statelock);
535 rp->r_flags |= R4ISXATTR;
536 mutex_exit(&rp->r_statelock);
537 }
538
539 /* if getting a bad file handle, do not cache the attributes. */
540 if (had_badfh) {
541 rw_exit(&rtable4[index].r_lock);
542 return (vp);
543 }
544
545 ASSERT(RW_LOCK_HELD(&rtable4[index].r_lock));
546 r4_do_attrcache(vp, garp, newnode, t, cr, index);
547 ASSERT(rw_owner(&rtable4[index].r_lock) != curthread);
548
549 return (vp);
550 }
551
552 /*
553 * Hash on address of filehandle object.
554 * XXX totally untuned.
555 */
556
557 int
rtable4hash(nfs4_sharedfh_t * fh)558 rtable4hash(nfs4_sharedfh_t *fh)
559 {
560 return (((uintptr_t)fh / sizeof (*fh)) & rtable4mask);
561 }
562
563 /*
564 * Find or create the vnode for the given filehandle and filesystem.
565 * *newnode is set to zero if the vnode already existed; non-zero if it had
566 * to be created.
567 *
568 * Note: make_rnode4() may upgrade the hash bucket lock to exclusive.
569 */
570
571 static vnode_t *
make_rnode4(nfs4_sharedfh_t * fh,r4hashq_t * rhtp,struct vfs * vfsp,struct vnodeops * vops,int (* putapage)(vnode_t *,page_t *,u_offset_t *,size_t *,int,cred_t *),int * newnode,cred_t * cr)572 make_rnode4(nfs4_sharedfh_t *fh, r4hashq_t *rhtp, struct vfs *vfsp,
573 struct vnodeops *vops,
574 int (*putapage)(vnode_t *, page_t *, u_offset_t *, size_t *, int, cred_t *),
575 int *newnode, cred_t *cr)
576 {
577 rnode4_t *rp;
578 rnode4_t *trp;
579 vnode_t *vp;
580 mntinfo4_t *mi;
581
582 ASSERT(RW_READ_HELD(&rhtp->r_lock));
583
584 mi = VFTOMI4(vfsp);
585
586 start:
587 if ((rp = r4find(rhtp, fh, vfsp)) != NULL) {
588 vp = RTOV4(rp);
589 *newnode = 0;
590 return (vp);
591 }
592 rw_exit(&rhtp->r_lock);
593
594 mutex_enter(&rp4freelist_lock);
595
596 if (rp4freelist != NULL && rnode4_new >= nrnode) {
597 rp = rp4freelist;
598 rp4_rmfree(rp);
599 mutex_exit(&rp4freelist_lock);
600
601 vp = RTOV4(rp);
602
603 if (rp->r_flags & R4HASHED) {
604 rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
605 mutex_enter(&vp->v_lock);
606 if (vp->v_count > 1) {
607 VN_RELE_LOCKED(vp);
608 mutex_exit(&vp->v_lock);
609 rw_exit(&rp->r_hashq->r_lock);
610 rw_enter(&rhtp->r_lock, RW_READER);
611 goto start;
612 }
613 mutex_exit(&vp->v_lock);
614 rp4_rmhash_locked(rp);
615 rw_exit(&rp->r_hashq->r_lock);
616 }
617
618 r4inactive(rp, cr);
619
620 mutex_enter(&vp->v_lock);
621 if (vp->v_count > 1) {
622 VN_RELE_LOCKED(vp);
623 mutex_exit(&vp->v_lock);
624 rw_enter(&rhtp->r_lock, RW_READER);
625 goto start;
626 }
627 mutex_exit(&vp->v_lock);
628 vn_invalid(vp);
629
630 /*
631 * destroy old locks before bzero'ing and
632 * recreating the locks below.
633 */
634 uninit_rnode4(rp);
635
636 /*
637 * Make sure that if rnode is recycled then
638 * VFS count is decremented properly before
639 * reuse.
640 */
641 VFS_RELE(vp->v_vfsp);
642 vn_reinit(vp);
643 } else {
644 vnode_t *new_vp;
645
646 mutex_exit(&rp4freelist_lock);
647
648 rp = kmem_cache_alloc(rnode4_cache, KM_SLEEP);
649 new_vp = vn_alloc(KM_SLEEP);
650
651 atomic_inc_ulong((ulong_t *)&rnode4_new);
652 #ifdef DEBUG
653 clstat4_debug.nrnode.value.ui64++;
654 #endif
655 vp = new_vp;
656 }
657
658 bzero(rp, sizeof (*rp));
659 rp->r_vnode = vp;
660 nfs_rw_init(&rp->r_rwlock, NULL, RW_DEFAULT, NULL);
661 nfs_rw_init(&rp->r_lkserlock, NULL, RW_DEFAULT, NULL);
662 mutex_init(&rp->r_svlock, NULL, MUTEX_DEFAULT, NULL);
663 mutex_init(&rp->r_statelock, NULL, MUTEX_DEFAULT, NULL);
664 mutex_init(&rp->r_statev4_lock, NULL, MUTEX_DEFAULT, NULL);
665 mutex_init(&rp->r_os_lock, NULL, MUTEX_DEFAULT, NULL);
666 rp->created_v4 = 0;
667 list_create(&rp->r_open_streams, sizeof (nfs4_open_stream_t),
668 offsetof(nfs4_open_stream_t, os_node));
669 rp->r_lo_head.lo_prev_rnode = &rp->r_lo_head;
670 rp->r_lo_head.lo_next_rnode = &rp->r_lo_head;
671 cv_init(&rp->r_cv, NULL, CV_DEFAULT, NULL);
672 cv_init(&rp->r_commit.c_cv, NULL, CV_DEFAULT, NULL);
673 rp->r_flags = R4READDIRWATTR;
674 rp->r_fh = fh;
675 rp->r_hashq = rhtp;
676 sfh4_hold(rp->r_fh);
677 rp->r_server = mi->mi_curr_serv;
678 rp->r_deleg_type = OPEN_DELEGATE_NONE;
679 rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
680 nfs_rw_init(&rp->r_deleg_recall_lock, NULL, RW_DEFAULT, NULL);
681
682 rddir4_cache_create(rp);
683 rp->r_putapage = putapage;
684 vn_setops(vp, vops);
685 vp->v_data = (caddr_t)rp;
686 vp->v_vfsp = vfsp;
687 VFS_HOLD(vfsp);
688 vp->v_type = VNON;
689 vp->v_flag |= VMODSORT;
690 if (isrootfh(fh, rp))
691 vp->v_flag = VROOT;
692 vn_exists(vp);
693
694 /*
695 * There is a race condition if someone else
696 * alloc's the rnode while no locks are held, so we
697 * check again and recover if found.
698 */
699 rw_enter(&rhtp->r_lock, RW_WRITER);
700 if ((trp = r4find(rhtp, fh, vfsp)) != NULL) {
701 vp = RTOV4(trp);
702 *newnode = 0;
703 rw_exit(&rhtp->r_lock);
704 rp4_addfree(rp, cr);
705 rw_enter(&rhtp->r_lock, RW_READER);
706 return (vp);
707 }
708 rp4_addhash(rp);
709 *newnode = 1;
710 return (vp);
711 }
712
713 static void
uninit_rnode4(rnode4_t * rp)714 uninit_rnode4(rnode4_t *rp)
715 {
716 vnode_t *vp = RTOV4(rp);
717
718 ASSERT(rp != NULL);
719 ASSERT(vp != NULL);
720 ASSERT(vp->v_count == 1);
721 ASSERT(rp->r_count == 0);
722 ASSERT(rp->r_mapcnt == 0);
723 if (rp->r_flags & R4LODANGLERS) {
724 nfs4_flush_lock_owners(rp);
725 }
726 ASSERT(rp->r_lo_head.lo_next_rnode == &rp->r_lo_head);
727 ASSERT(rp->r_lo_head.lo_prev_rnode == &rp->r_lo_head);
728 ASSERT(!(rp->r_flags & R4HASHED));
729 ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL);
730 nfs4_clear_open_streams(rp);
731 list_destroy(&rp->r_open_streams);
732
733 /*
734 * Destroy the rddir cache first since we need to grab the r_statelock.
735 */
736 mutex_enter(&rp->r_statelock);
737 rddir4_cache_destroy(rp);
738 mutex_exit(&rp->r_statelock);
739 sv_uninit(&rp->r_svnode);
740 sfh4_rele(&rp->r_fh);
741 nfs_rw_destroy(&rp->r_rwlock);
742 nfs_rw_destroy(&rp->r_lkserlock);
743 mutex_destroy(&rp->r_statelock);
744 mutex_destroy(&rp->r_statev4_lock);
745 mutex_destroy(&rp->r_os_lock);
746 cv_destroy(&rp->r_cv);
747 cv_destroy(&rp->r_commit.c_cv);
748 nfs_rw_destroy(&rp->r_deleg_recall_lock);
749 if (rp->r_flags & R4DELMAPLIST)
750 list_destroy(&rp->r_indelmap);
751 }
752
753 /*
754 * Put an rnode on the free list.
755 *
756 * Rnodes which were allocated above and beyond the normal limit
757 * are immediately freed.
758 */
759 void
rp4_addfree(rnode4_t * rp,cred_t * cr)760 rp4_addfree(rnode4_t *rp, cred_t *cr)
761 {
762 vnode_t *vp;
763 vnode_t *xattr;
764 struct vfs *vfsp;
765
766 vp = RTOV4(rp);
767 ASSERT(vp->v_count >= 1);
768 ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL);
769
770 /*
771 * If we have too many rnodes allocated and there are no
772 * references to this rnode, or if the rnode is no longer
773 * accessible by it does not reside in the hash queues,
774 * or if an i/o error occurred while writing to the file,
775 * then just free it instead of putting it on the rnode
776 * freelist.
777 */
778 vfsp = vp->v_vfsp;
779 if (((rnode4_new > nrnode || !(rp->r_flags & R4HASHED) ||
780 #ifdef DEBUG
781 (nfs4_rnode_nofreelist != 0) ||
782 #endif
783 rp->r_error || (rp->r_flags & R4RECOVERR) ||
784 (vfsp->vfs_flag & VFS_UNMOUNTED)) && rp->r_count == 0)) {
785 if (rp->r_flags & R4HASHED) {
786 rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
787 mutex_enter(&vp->v_lock);
788 if (vp->v_count > 1) {
789 VN_RELE_LOCKED(vp);
790 mutex_exit(&vp->v_lock);
791 rw_exit(&rp->r_hashq->r_lock);
792 return;
793 }
794 mutex_exit(&vp->v_lock);
795 rp4_rmhash_locked(rp);
796 rw_exit(&rp->r_hashq->r_lock);
797 }
798
799 /*
800 * Make sure we don't have a delegation on this rnode
801 * before destroying it.
802 */
803 if (rp->r_deleg_type != OPEN_DELEGATE_NONE) {
804 (void) nfs4delegreturn(rp,
805 NFS4_DR_FORCE|NFS4_DR_PUSH|NFS4_DR_REOPEN);
806 }
807
808 r4inactive(rp, cr);
809
810 /*
811 * Recheck the vnode reference count. We need to
812 * make sure that another reference has not been
813 * acquired while we were not holding v_lock. The
814 * rnode is not in the rnode hash queues; one
815 * way for a reference to have been acquired
816 * is for a VOP_PUTPAGE because the rnode was marked
817 * with R4DIRTY or for a modified page. This
818 * reference may have been acquired before our call
819 * to r4inactive. The i/o may have been completed,
820 * thus allowing r4inactive to complete, but the
821 * reference to the vnode may not have been released
822 * yet. In any case, the rnode can not be destroyed
823 * until the other references to this vnode have been
824 * released. The other references will take care of
825 * either destroying the rnode or placing it on the
826 * rnode freelist. If there are no other references,
827 * then the rnode may be safely destroyed.
828 */
829 mutex_enter(&vp->v_lock);
830 if (vp->v_count > 1) {
831 VN_RELE_LOCKED(vp);
832 mutex_exit(&vp->v_lock);
833 return;
834 }
835 mutex_exit(&vp->v_lock);
836
837 destroy_rnode4(rp);
838 return;
839 }
840
841 /*
842 * Lock the hash queue and then recheck the reference count
843 * to ensure that no other threads have acquired a reference
844 * to indicate that the rnode should not be placed on the
845 * freelist. If another reference has been acquired, then
846 * just release this one and let the other thread complete
847 * the processing of adding this rnode to the freelist.
848 */
849 again:
850 rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
851
852 mutex_enter(&vp->v_lock);
853 if (vp->v_count > 1) {
854 VN_RELE_LOCKED(vp);
855 mutex_exit(&vp->v_lock);
856 rw_exit(&rp->r_hashq->r_lock);
857 return;
858 }
859 mutex_exit(&vp->v_lock);
860
861 /*
862 * Make sure we don't put an rnode with a delegation
863 * on the free list.
864 */
865 if (rp->r_deleg_type != OPEN_DELEGATE_NONE) {
866 rw_exit(&rp->r_hashq->r_lock);
867 (void) nfs4delegreturn(rp,
868 NFS4_DR_FORCE|NFS4_DR_PUSH|NFS4_DR_REOPEN);
869 goto again;
870 }
871
872 /*
873 * Now that we have the hash queue lock, and we know there
874 * are not anymore references on the vnode, check to make
875 * sure there aren't any open streams still on the rnode.
876 * If so, drop the hash queue lock, remove the open streams,
877 * and recheck the v_count.
878 */
879 mutex_enter(&rp->r_os_lock);
880 if (list_head(&rp->r_open_streams) != NULL) {
881 mutex_exit(&rp->r_os_lock);
882 rw_exit(&rp->r_hashq->r_lock);
883 if (nfs_zone() != VTOMI4(vp)->mi_zone)
884 nfs4_clear_open_streams(rp);
885 else
886 (void) nfs4close_all(vp, cr);
887 goto again;
888 }
889 mutex_exit(&rp->r_os_lock);
890
891 /*
892 * Before we put it on the freelist, make sure there are no pages.
893 * If there are, flush and commit of all of the dirty and
894 * uncommitted pages, assuming the file system isn't read only.
895 */
896 if (!(vp->v_vfsp->vfs_flag & VFS_RDONLY) && nfs4_dross_pages(vp)) {
897 rw_exit(&rp->r_hashq->r_lock);
898 r4flushpages(rp, cr);
899 goto again;
900 }
901
902 /*
903 * Before we put it on the freelist, make sure there is no
904 * active xattr directory cached, the freelist will not
905 * have its entries r4inactive'd if there is still an active
906 * rnode, thus nothing in the freelist can hold another
907 * rnode active.
908 */
909 xattr = rp->r_xattr_dir;
910 rp->r_xattr_dir = NULL;
911
912 /*
913 * If there is no cached data or metadata for this file, then
914 * put the rnode on the front of the freelist so that it will
915 * be reused before other rnodes which may have cached data or
916 * metadata associated with them.
917 */
918 mutex_enter(&rp4freelist_lock);
919 if (rp4freelist == NULL) {
920 rp->r_freef = rp;
921 rp->r_freeb = rp;
922 rp4freelist = rp;
923 } else {
924 rp->r_freef = rp4freelist;
925 rp->r_freeb = rp4freelist->r_freeb;
926 rp4freelist->r_freeb->r_freef = rp;
927 rp4freelist->r_freeb = rp;
928 if (!nfs4_has_pages(vp) && rp->r_dir == NULL &&
929 rp->r_symlink.contents == NULL && rp->r_secattr == NULL)
930 rp4freelist = rp;
931 }
932 mutex_exit(&rp4freelist_lock);
933
934 rw_exit(&rp->r_hashq->r_lock);
935
936 if (xattr)
937 VN_RELE(xattr);
938 }
939
940 /*
941 * Remove an rnode from the free list.
942 *
943 * The caller must be holding rp4freelist_lock and the rnode
944 * must be on the freelist.
945 */
946 static void
rp4_rmfree(rnode4_t * rp)947 rp4_rmfree(rnode4_t *rp)
948 {
949
950 ASSERT(MUTEX_HELD(&rp4freelist_lock));
951 ASSERT(rp->r_freef != NULL && rp->r_freeb != NULL);
952
953 if (rp == rp4freelist) {
954 rp4freelist = rp->r_freef;
955 if (rp == rp4freelist)
956 rp4freelist = NULL;
957 }
958 rp->r_freeb->r_freef = rp->r_freef;
959 rp->r_freef->r_freeb = rp->r_freeb;
960
961 rp->r_freef = rp->r_freeb = NULL;
962 }
963
964 /*
965 * Put a rnode in the hash table.
966 *
967 * The caller must be holding the exclusive hash queue lock
968 */
969 void
rp4_addhash(rnode4_t * rp)970 rp4_addhash(rnode4_t *rp)
971 {
972 mntinfo4_t *mi;
973
974 ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
975 ASSERT(!(rp->r_flags & R4HASHED));
976
977 #ifdef DEBUG
978 r4_dup_check(rp, RTOV4(rp)->v_vfsp);
979 #endif
980
981 rp->r_hashf = rp->r_hashq->r_hashf;
982 rp->r_hashq->r_hashf = rp;
983 rp->r_hashb = (rnode4_t *)rp->r_hashq;
984 rp->r_hashf->r_hashb = rp;
985
986 mutex_enter(&rp->r_statelock);
987 rp->r_flags |= R4HASHED;
988 mutex_exit(&rp->r_statelock);
989
990 mi = VTOMI4(RTOV4(rp));
991 mutex_enter(&mi->mi_rnodes_lock);
992 list_insert_tail(&mi->mi_rnodes, rp);
993 mutex_exit(&mi->mi_rnodes_lock);
994 }
995
996 /*
997 * Remove a rnode from the hash table.
998 *
999 * The caller must be holding the hash queue lock.
1000 */
1001 void
rp4_rmhash_locked(rnode4_t * rp)1002 rp4_rmhash_locked(rnode4_t *rp)
1003 {
1004 mntinfo4_t *mi;
1005
1006 ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
1007 ASSERT(rp->r_flags & R4HASHED);
1008
1009 rp->r_hashb->r_hashf = rp->r_hashf;
1010 rp->r_hashf->r_hashb = rp->r_hashb;
1011
1012 mutex_enter(&rp->r_statelock);
1013 rp->r_flags &= ~R4HASHED;
1014 mutex_exit(&rp->r_statelock);
1015
1016 mi = VTOMI4(RTOV4(rp));
1017 mutex_enter(&mi->mi_rnodes_lock);
1018 if (list_link_active(&rp->r_mi_link))
1019 list_remove(&mi->mi_rnodes, rp);
1020 mutex_exit(&mi->mi_rnodes_lock);
1021 }
1022
1023 /*
1024 * Remove a rnode from the hash table.
1025 *
1026 * The caller must not be holding the hash queue lock.
1027 */
1028 void
rp4_rmhash(rnode4_t * rp)1029 rp4_rmhash(rnode4_t *rp)
1030 {
1031 rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
1032 rp4_rmhash_locked(rp);
1033 rw_exit(&rp->r_hashq->r_lock);
1034 }
1035
1036 /*
1037 * Lookup a rnode by fhandle. Ignores rnodes that had failed recovery.
1038 * Returns NULL if no match. If an rnode is returned, the reference count
1039 * on the master vnode is incremented.
1040 *
1041 * The caller must be holding the hash queue lock, either shared or exclusive.
1042 */
1043 rnode4_t *
r4find(r4hashq_t * rhtp,nfs4_sharedfh_t * fh,struct vfs * vfsp)1044 r4find(r4hashq_t *rhtp, nfs4_sharedfh_t *fh, struct vfs *vfsp)
1045 {
1046 rnode4_t *rp;
1047 vnode_t *vp;
1048
1049 ASSERT(RW_LOCK_HELD(&rhtp->r_lock));
1050
1051 for (rp = rhtp->r_hashf; rp != (rnode4_t *)rhtp; rp = rp->r_hashf) {
1052 vp = RTOV4(rp);
1053 if (vp->v_vfsp == vfsp && SFH4_SAME(rp->r_fh, fh)) {
1054
1055 mutex_enter(&rp->r_statelock);
1056 if (rp->r_flags & R4RECOVERR) {
1057 mutex_exit(&rp->r_statelock);
1058 continue;
1059 }
1060 mutex_exit(&rp->r_statelock);
1061 #ifdef DEBUG
1062 r4_dup_check(rp, vfsp);
1063 #endif
1064 if (rp->r_freef != NULL) {
1065 mutex_enter(&rp4freelist_lock);
1066 /*
1067 * If the rnode is on the freelist,
1068 * then remove it and use that reference
1069 * as the new reference. Otherwise,
1070 * need to increment the reference count.
1071 */
1072 if (rp->r_freef != NULL) {
1073 rp4_rmfree(rp);
1074 mutex_exit(&rp4freelist_lock);
1075 } else {
1076 mutex_exit(&rp4freelist_lock);
1077 VN_HOLD(vp);
1078 }
1079 } else
1080 VN_HOLD(vp);
1081
1082 /*
1083 * if root vnode, set v_flag to indicate that
1084 */
1085 if (isrootfh(fh, rp)) {
1086 if (!(vp->v_flag & VROOT)) {
1087 mutex_enter(&vp->v_lock);
1088 vp->v_flag |= VROOT;
1089 mutex_exit(&vp->v_lock);
1090 }
1091 }
1092 return (rp);
1093 }
1094 }
1095 return (NULL);
1096 }
1097
1098 /*
1099 * Lookup an rnode by fhandle. Just a wrapper for r4find()
1100 * that assumes the caller hasn't already got the lock
1101 * on the hash bucket.
1102 */
1103 rnode4_t *
r4find_unlocked(nfs4_sharedfh_t * fh,struct vfs * vfsp)1104 r4find_unlocked(nfs4_sharedfh_t *fh, struct vfs *vfsp)
1105 {
1106 rnode4_t *rp;
1107 int index;
1108
1109 index = rtable4hash(fh);
1110 rw_enter(&rtable4[index].r_lock, RW_READER);
1111 rp = r4find(&rtable4[index], fh, vfsp);
1112 rw_exit(&rtable4[index].r_lock);
1113
1114 return (rp);
1115 }
1116
1117 /*
1118 * Return 1 if there is an active vnode belonging to this vfs in the
1119 * rtable4 cache.
1120 *
1121 * Several of these checks are done without holding the usual
1122 * locks. This is safe because destroy_rtable4(), rp4_addfree(),
1123 * etc. will redo the necessary checks before actually destroying
1124 * any rnodes.
1125 */
1126 int
check_rtable4(struct vfs * vfsp)1127 check_rtable4(struct vfs *vfsp)
1128 {
1129 rnode4_t *rp;
1130 vnode_t *vp;
1131 mntinfo4_t *mi;
1132
1133 ASSERT(vfsp != NULL);
1134 mi = VFTOMI4(vfsp);
1135
1136 mutex_enter(&mi->mi_rnodes_lock);
1137 for (rp = list_head(&mi->mi_rnodes); rp != NULL;
1138 rp = list_next(&mi->mi_rnodes, rp)) {
1139 vp = RTOV4(rp);
1140
1141 if (rp->r_freef == NULL ||
1142 (nfs4_has_pages(vp) && (rp->r_flags & R4DIRTY)) ||
1143 rp->r_count > 0) {
1144 mutex_exit(&mi->mi_rnodes_lock);
1145 return (1);
1146 }
1147 }
1148 mutex_exit(&mi->mi_rnodes_lock);
1149
1150 return (0);
1151 }
1152
1153 /*
1154 * Destroy inactive vnodes from the hash queues which
1155 * belong to this vfs. All of the vnodes should be inactive.
1156 * It is essential that we destroy all rnodes in case of
1157 * forced unmount as well as in normal unmount case.
1158 */
1159
1160 void
destroy_rtable4(struct vfs * vfsp,cred_t * cr)1161 destroy_rtable4(struct vfs *vfsp, cred_t *cr)
1162 {
1163 rnode4_t *rp;
1164 mntinfo4_t *mi;
1165
1166 ASSERT(vfsp != NULL);
1167
1168 mi = VFTOMI4(vfsp);
1169
1170 mutex_enter(&rp4freelist_lock);
1171 mutex_enter(&mi->mi_rnodes_lock);
1172 while ((rp = list_remove_head(&mi->mi_rnodes)) != NULL) {
1173 /*
1174 * If the rnode is no longer on the freelist it is not
1175 * ours and it will be handled by some other thread, so
1176 * skip it.
1177 */
1178 if (rp->r_freef == NULL)
1179 continue;
1180 mutex_exit(&mi->mi_rnodes_lock);
1181
1182 rp4_rmfree(rp);
1183 mutex_exit(&rp4freelist_lock);
1184
1185 rp4_rmhash(rp);
1186
1187 /*
1188 * This call to rp4_addfree will end up destroying the
1189 * rnode, but in a safe way with the appropriate set
1190 * of checks done.
1191 */
1192 rp4_addfree(rp, cr);
1193
1194 mutex_enter(&rp4freelist_lock);
1195 mutex_enter(&mi->mi_rnodes_lock);
1196 }
1197 mutex_exit(&mi->mi_rnodes_lock);
1198 mutex_exit(&rp4freelist_lock);
1199 }
1200
1201 /*
1202 * This routine destroys all the resources of an rnode
1203 * and finally the rnode itself.
1204 */
1205 static void
destroy_rnode4(rnode4_t * rp)1206 destroy_rnode4(rnode4_t *rp)
1207 {
1208 vnode_t *vp;
1209 vfs_t *vfsp;
1210
1211 ASSERT(rp->r_deleg_type == OPEN_DELEGATE_NONE);
1212
1213 vp = RTOV4(rp);
1214 vfsp = vp->v_vfsp;
1215
1216 uninit_rnode4(rp);
1217 atomic_dec_ulong((ulong_t *)&rnode4_new);
1218 #ifdef DEBUG
1219 clstat4_debug.nrnode.value.ui64--;
1220 #endif
1221 kmem_cache_free(rnode4_cache, rp);
1222 vn_invalid(vp);
1223 vn_free(vp);
1224 VFS_RELE(vfsp);
1225 }
1226
1227 /*
1228 * Invalidate the attributes on all rnodes forcing the next getattr
1229 * to go over the wire. Used to flush stale uid and gid mappings.
1230 * Maybe done on a per vfsp, or all rnodes (vfsp == NULL)
1231 */
1232 void
nfs4_rnode_invalidate(struct vfs * vfsp)1233 nfs4_rnode_invalidate(struct vfs *vfsp)
1234 {
1235 int index;
1236 rnode4_t *rp;
1237 vnode_t *vp;
1238
1239 /*
1240 * Walk the hash queues looking for rnodes.
1241 */
1242 for (index = 0; index < rtable4size; index++) {
1243 rw_enter(&rtable4[index].r_lock, RW_READER);
1244 for (rp = rtable4[index].r_hashf;
1245 rp != (rnode4_t *)(&rtable4[index]);
1246 rp = rp->r_hashf) {
1247 vp = RTOV4(rp);
1248 if (vfsp != NULL && vp->v_vfsp != vfsp)
1249 continue;
1250
1251 if (!mutex_tryenter(&rp->r_statelock))
1252 continue;
1253
1254 /*
1255 * Expire the attributes by resetting the change
1256 * and attr timeout.
1257 */
1258 rp->r_change = 0;
1259 PURGE_ATTRCACHE4_LOCKED(rp);
1260 mutex_exit(&rp->r_statelock);
1261 }
1262 rw_exit(&rtable4[index].r_lock);
1263 }
1264 }
1265
1266 /*
1267 * Flush all vnodes in this (or every) vfs.
1268 * Used by nfs_sync and by nfs_unmount.
1269 */
1270 void
r4flush(struct vfs * vfsp,cred_t * cr)1271 r4flush(struct vfs *vfsp, cred_t *cr)
1272 {
1273 int index;
1274 rnode4_t *rp;
1275 vnode_t *vp, **vplist;
1276 long num, cnt;
1277
1278 /*
1279 * Check to see whether there is anything to do.
1280 */
1281 num = rnode4_new;
1282 if (num == 0)
1283 return;
1284
1285 /*
1286 * Allocate a slot for all currently active rnodes on the
1287 * supposition that they all may need flushing.
1288 */
1289 vplist = kmem_alloc(num * sizeof (*vplist), KM_SLEEP);
1290 cnt = 0;
1291
1292 /*
1293 * If the vfs is known we can do fast path by iterating all rnodes that
1294 * belongs to this vfs. This is much faster than the traditional way
1295 * of iterating rtable4 (below) in a case there is a lot of rnodes that
1296 * does not belong to our vfs.
1297 */
1298 if (vfsp != NULL) {
1299 mntinfo4_t *mi = VFTOMI4(vfsp);
1300
1301 mutex_enter(&mi->mi_rnodes_lock);
1302 for (rp = list_head(&mi->mi_rnodes); rp != NULL;
1303 rp = list_next(&mi->mi_rnodes, rp)) {
1304 vp = RTOV4(rp);
1305 /*
1306 * Don't bother sync'ing a vp if it
1307 * is part of virtual swap device or
1308 * if VFS is read-only
1309 */
1310 if (IS_SWAPVP(vp) || vn_is_readonly(vp))
1311 continue;
1312 /*
1313 * If the vnode has pages and is marked as either dirty
1314 * or mmap'd, hold and add this vnode to the list of
1315 * vnodes to flush.
1316 */
1317 ASSERT(vp->v_vfsp == vfsp);
1318 if (nfs4_has_pages(vp) &&
1319 ((rp->r_flags & R4DIRTY) || rp->r_mapcnt > 0)) {
1320 VN_HOLD(vp);
1321 vplist[cnt++] = vp;
1322 if (cnt == num) {
1323 /*
1324 * The vplist is full because there is
1325 * too many rnodes. We are done for
1326 * now.
1327 */
1328 break;
1329 }
1330 }
1331 }
1332 mutex_exit(&mi->mi_rnodes_lock);
1333
1334 goto done;
1335 }
1336
1337 ASSERT(vfsp == NULL);
1338
1339 /*
1340 * Walk the hash queues looking for rnodes with page
1341 * lists associated with them. Make a list of these
1342 * files.
1343 */
1344 for (index = 0; index < rtable4size; index++) {
1345 rw_enter(&rtable4[index].r_lock, RW_READER);
1346 for (rp = rtable4[index].r_hashf;
1347 rp != (rnode4_t *)(&rtable4[index]);
1348 rp = rp->r_hashf) {
1349 vp = RTOV4(rp);
1350 /*
1351 * Don't bother sync'ing a vp if it
1352 * is part of virtual swap device or
1353 * if VFS is read-only
1354 */
1355 if (IS_SWAPVP(vp) || vn_is_readonly(vp))
1356 continue;
1357 /*
1358 * If the vnode has pages and is marked as either dirty
1359 * or mmap'd, hold and add this vnode to the list of
1360 * vnodes to flush.
1361 */
1362 if (nfs4_has_pages(vp) &&
1363 ((rp->r_flags & R4DIRTY) || rp->r_mapcnt > 0)) {
1364 VN_HOLD(vp);
1365 vplist[cnt++] = vp;
1366 if (cnt == num) {
1367 rw_exit(&rtable4[index].r_lock);
1368 /*
1369 * The vplist is full because there is
1370 * too many rnodes. We are done for
1371 * now.
1372 */
1373 goto done;
1374 }
1375 }
1376 }
1377 rw_exit(&rtable4[index].r_lock);
1378 }
1379
1380 done:
1381
1382 /*
1383 * Flush and release all of the files on the list.
1384 */
1385 while (cnt-- > 0) {
1386 vp = vplist[cnt];
1387 (void) VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_ASYNC, cr, NULL);
1388 VN_RELE(vp);
1389 }
1390
1391 /*
1392 * Free the space allocated to hold the list.
1393 */
1394 kmem_free(vplist, num * sizeof (*vplist));
1395 }
1396
1397 int
nfs4_free_data_reclaim(rnode4_t * rp)1398 nfs4_free_data_reclaim(rnode4_t *rp)
1399 {
1400 char *contents;
1401 vnode_t *xattr;
1402 int size;
1403 vsecattr_t *vsp;
1404 int freed;
1405 bool_t rdc = FALSE;
1406
1407 /*
1408 * Free any held caches which may
1409 * be associated with this rnode.
1410 */
1411 mutex_enter(&rp->r_statelock);
1412 if (rp->r_dir != NULL)
1413 rdc = TRUE;
1414 contents = rp->r_symlink.contents;
1415 size = rp->r_symlink.size;
1416 rp->r_symlink.contents = NULL;
1417 vsp = rp->r_secattr;
1418 rp->r_secattr = NULL;
1419 xattr = rp->r_xattr_dir;
1420 rp->r_xattr_dir = NULL;
1421 mutex_exit(&rp->r_statelock);
1422
1423 /*
1424 * Free the access cache entries.
1425 */
1426 freed = nfs4_access_purge_rp(rp);
1427
1428 if (rdc == FALSE && contents == NULL && vsp == NULL && xattr == NULL)
1429 return (freed);
1430
1431 /*
1432 * Free the readdir cache entries, incompletely if we can't block.
1433 */
1434 nfs4_purge_rddir_cache(RTOV4(rp));
1435
1436 /*
1437 * Free the symbolic link cache.
1438 */
1439 if (contents != NULL) {
1440
1441 kmem_free((void *)contents, size);
1442 }
1443
1444 /*
1445 * Free any cached ACL.
1446 */
1447 if (vsp != NULL)
1448 nfs4_acl_free_cache(vsp);
1449
1450 /*
1451 * Release the xattr directory vnode
1452 */
1453 if (xattr != NULL)
1454 VN_RELE(xattr);
1455
1456 return (1);
1457 }
1458
1459 static int
nfs4_active_data_reclaim(rnode4_t * rp)1460 nfs4_active_data_reclaim(rnode4_t *rp)
1461 {
1462 char *contents;
1463 vnode_t *xattr = NULL;
1464 int size;
1465 vsecattr_t *vsp;
1466 int freed;
1467 bool_t rdc = FALSE;
1468
1469 /*
1470 * Free any held credentials and caches which
1471 * may be associated with this rnode.
1472 */
1473 if (!mutex_tryenter(&rp->r_statelock))
1474 return (0);
1475 contents = rp->r_symlink.contents;
1476 size = rp->r_symlink.size;
1477 rp->r_symlink.contents = NULL;
1478 vsp = rp->r_secattr;
1479 rp->r_secattr = NULL;
1480 if (rp->r_dir != NULL)
1481 rdc = TRUE;
1482 /*
1483 * To avoid a deadlock, do not free r_xattr_dir cache if it is hashed
1484 * on the same r_hashq queue. We are not mandated to free all caches.
1485 * VN_RELE(rp->r_xattr_dir) will be done sometime later - e.g. when the
1486 * rnode 'rp' is freed or put on the free list.
1487 *
1488 * We will retain NFS4_XATTR_DIR_NOTSUPP because:
1489 * - it has no associated rnode4_t (its v_data is NULL),
1490 * - it is preallocated statically and will never go away,
1491 * so we cannot save anything by releasing it.
1492 */
1493 if (rp->r_xattr_dir && rp->r_xattr_dir != NFS4_XATTR_DIR_NOTSUPP &&
1494 VTOR4(rp->r_xattr_dir)->r_hashq != rp->r_hashq) {
1495 xattr = rp->r_xattr_dir;
1496 rp->r_xattr_dir = NULL;
1497 }
1498 mutex_exit(&rp->r_statelock);
1499
1500 /*
1501 * Free the access cache entries.
1502 */
1503 freed = nfs4_access_purge_rp(rp);
1504
1505 if (contents == NULL && vsp == NULL && rdc == FALSE && xattr == NULL)
1506 return (freed);
1507
1508 /*
1509 * Free the symbolic link cache.
1510 */
1511 if (contents != NULL) {
1512
1513 kmem_free((void *)contents, size);
1514 }
1515
1516 /*
1517 * Free any cached ACL.
1518 */
1519 if (vsp != NULL)
1520 nfs4_acl_free_cache(vsp);
1521
1522 nfs4_purge_rddir_cache(RTOV4(rp));
1523
1524 /*
1525 * Release the xattr directory vnode
1526 */
1527 if (xattr != NULL)
1528 VN_RELE(xattr);
1529
1530 return (1);
1531 }
1532
1533 static int
nfs4_free_reclaim(void)1534 nfs4_free_reclaim(void)
1535 {
1536 int freed;
1537 rnode4_t *rp;
1538
1539 #ifdef DEBUG
1540 clstat4_debug.f_reclaim.value.ui64++;
1541 #endif
1542 freed = 0;
1543 mutex_enter(&rp4freelist_lock);
1544 rp = rp4freelist;
1545 if (rp != NULL) {
1546 do {
1547 if (nfs4_free_data_reclaim(rp))
1548 freed = 1;
1549 } while ((rp = rp->r_freef) != rp4freelist);
1550 }
1551 mutex_exit(&rp4freelist_lock);
1552 return (freed);
1553 }
1554
1555 static int
nfs4_active_reclaim(void)1556 nfs4_active_reclaim(void)
1557 {
1558 int freed;
1559 int index;
1560 rnode4_t *rp;
1561
1562 #ifdef DEBUG
1563 clstat4_debug.a_reclaim.value.ui64++;
1564 #endif
1565 freed = 0;
1566 for (index = 0; index < rtable4size; index++) {
1567 rw_enter(&rtable4[index].r_lock, RW_READER);
1568 for (rp = rtable4[index].r_hashf;
1569 rp != (rnode4_t *)(&rtable4[index]);
1570 rp = rp->r_hashf) {
1571 if (nfs4_active_data_reclaim(rp))
1572 freed = 1;
1573 }
1574 rw_exit(&rtable4[index].r_lock);
1575 }
1576 return (freed);
1577 }
1578
1579 static int
nfs4_rnode_reclaim(void)1580 nfs4_rnode_reclaim(void)
1581 {
1582 int freed;
1583 rnode4_t *rp;
1584 vnode_t *vp;
1585
1586 #ifdef DEBUG
1587 clstat4_debug.r_reclaim.value.ui64++;
1588 #endif
1589 freed = 0;
1590 mutex_enter(&rp4freelist_lock);
1591 while ((rp = rp4freelist) != NULL) {
1592 rp4_rmfree(rp);
1593 mutex_exit(&rp4freelist_lock);
1594 if (rp->r_flags & R4HASHED) {
1595 vp = RTOV4(rp);
1596 rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
1597 mutex_enter(&vp->v_lock);
1598 if (vp->v_count > 1) {
1599 VN_RELE_LOCKED(vp);
1600 mutex_exit(&vp->v_lock);
1601 rw_exit(&rp->r_hashq->r_lock);
1602 mutex_enter(&rp4freelist_lock);
1603 continue;
1604 }
1605 mutex_exit(&vp->v_lock);
1606 rp4_rmhash_locked(rp);
1607 rw_exit(&rp->r_hashq->r_lock);
1608 }
1609 /*
1610 * This call to rp_addfree will end up destroying the
1611 * rnode, but in a safe way with the appropriate set
1612 * of checks done.
1613 */
1614 rp4_addfree(rp, CRED());
1615 mutex_enter(&rp4freelist_lock);
1616 }
1617 mutex_exit(&rp4freelist_lock);
1618 return (freed);
1619 }
1620
1621 /*ARGSUSED*/
1622 static void
nfs4_reclaim(void * cdrarg)1623 nfs4_reclaim(void *cdrarg)
1624 {
1625 #ifdef DEBUG
1626 clstat4_debug.reclaim.value.ui64++;
1627 #endif
1628 if (nfs4_free_reclaim())
1629 return;
1630
1631 if (nfs4_active_reclaim())
1632 return;
1633
1634 (void) nfs4_rnode_reclaim();
1635 }
1636
1637 /*
1638 * Returns the clientid4 to use for the given mntinfo4. Note that the
1639 * clientid can change if the caller drops mi_recovlock.
1640 */
1641
1642 clientid4
mi2clientid(mntinfo4_t * mi)1643 mi2clientid(mntinfo4_t *mi)
1644 {
1645 nfs4_server_t *sp;
1646 clientid4 clientid = 0;
1647
1648 /* this locks down sp if it is found */
1649 sp = find_nfs4_server(mi);
1650 if (sp != NULL) {
1651 clientid = sp->clientid;
1652 mutex_exit(&sp->s_lock);
1653 nfs4_server_rele(sp);
1654 }
1655 return (clientid);
1656 }
1657
1658 /*
1659 * Return the current lease time for the server associated with the given
1660 * file. Note that the lease time could change immediately after this
1661 * call.
1662 */
1663
1664 time_t
r2lease_time(rnode4_t * rp)1665 r2lease_time(rnode4_t *rp)
1666 {
1667 nfs4_server_t *sp;
1668 time_t lease_time;
1669 mntinfo4_t *mi = VTOMI4(RTOV4(rp));
1670
1671 (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER, 0);
1672
1673 /* this locks down sp if it is found */
1674 sp = find_nfs4_server(VTOMI4(RTOV4(rp)));
1675
1676 if (VTOMI4(RTOV4(rp))->mi_vfsp->vfs_flag & VFS_UNMOUNTED) {
1677 if (sp != NULL) {
1678 mutex_exit(&sp->s_lock);
1679 nfs4_server_rele(sp);
1680 }
1681 nfs_rw_exit(&mi->mi_recovlock);
1682 return (1); /* 1 second */
1683 }
1684
1685 ASSERT(sp != NULL);
1686
1687 lease_time = sp->s_lease_time;
1688
1689 mutex_exit(&sp->s_lock);
1690 nfs4_server_rele(sp);
1691 nfs_rw_exit(&mi->mi_recovlock);
1692
1693 return (lease_time);
1694 }
1695
1696 /*
1697 * Return a list with information about all the known open instances for
1698 * a filesystem. The caller must call r4releopenlist() when done with the
1699 * list.
1700 *
1701 * We are safe at looking at os_valid and os_pending_close across dropping
1702 * the 'os_sync_lock' to count up the number of open streams and then
1703 * allocate memory for the osp list due to:
1704 * -Looking at os_pending_close is safe since this routine is
1705 * only called via recovery, and os_pending_close can only be set via
1706 * a non-recovery operation (which are all blocked when recovery
1707 * is active).
1708 *
1709 * -Examining os_valid is safe since non-recovery operations, which
1710 * could potentially switch os_valid to 0, are blocked (via
1711 * nfs4_start_fop) and recovery is single-threaded per mntinfo4_t
1712 * (which means we are the only recovery thread potentially acting
1713 * on this open stream).
1714 */
1715
1716 nfs4_opinst_t *
r4mkopenlist(mntinfo4_t * mi)1717 r4mkopenlist(mntinfo4_t *mi)
1718 {
1719 nfs4_opinst_t *reopenlist, *rep;
1720 rnode4_t *rp;
1721 vnode_t *vp;
1722 vfs_t *vfsp = mi->mi_vfsp;
1723 int numosp;
1724 nfs4_open_stream_t *osp;
1725 int index;
1726 open_delegation_type4 dtype;
1727 int hold_vnode;
1728
1729 reopenlist = NULL;
1730
1731 for (index = 0; index < rtable4size; index++) {
1732 rw_enter(&rtable4[index].r_lock, RW_READER);
1733 for (rp = rtable4[index].r_hashf;
1734 rp != (rnode4_t *)(&rtable4[index]);
1735 rp = rp->r_hashf) {
1736
1737 vp = RTOV4(rp);
1738 if (vp->v_vfsp != vfsp)
1739 continue;
1740 hold_vnode = 0;
1741
1742 mutex_enter(&rp->r_os_lock);
1743
1744 /* Count the number of valid open_streams of the file */
1745 numosp = 0;
1746 for (osp = list_head(&rp->r_open_streams); osp != NULL;
1747 osp = list_next(&rp->r_open_streams, osp)) {
1748 mutex_enter(&osp->os_sync_lock);
1749 if (osp->os_valid && !osp->os_pending_close)
1750 numosp++;
1751 mutex_exit(&osp->os_sync_lock);
1752 }
1753
1754 /* Fill in the valid open streams per vp */
1755 if (numosp > 0) {
1756 int j;
1757
1758 hold_vnode = 1;
1759
1760 /*
1761 * Add a new open instance to the list
1762 */
1763 rep = kmem_zalloc(sizeof (*reopenlist),
1764 KM_SLEEP);
1765 rep->re_next = reopenlist;
1766 reopenlist = rep;
1767
1768 rep->re_vp = vp;
1769 rep->re_osp = kmem_zalloc(
1770 numosp * sizeof (*(rep->re_osp)),
1771 KM_SLEEP);
1772 rep->re_numosp = numosp;
1773
1774 j = 0;
1775 for (osp = list_head(&rp->r_open_streams);
1776 osp != NULL;
1777 osp = list_next(&rp->r_open_streams, osp)) {
1778
1779 mutex_enter(&osp->os_sync_lock);
1780 if (osp->os_valid &&
1781 !osp->os_pending_close) {
1782 osp->os_ref_count++;
1783 rep->re_osp[j] = osp;
1784 j++;
1785 }
1786 mutex_exit(&osp->os_sync_lock);
1787 }
1788 /*
1789 * Assuming valid osp(s) stays valid between
1790 * the time obtaining j and numosp.
1791 */
1792 ASSERT(j == numosp);
1793 }
1794
1795 mutex_exit(&rp->r_os_lock);
1796 /* do this here to keep v_lock > r_os_lock */
1797 if (hold_vnode)
1798 VN_HOLD(vp);
1799 mutex_enter(&rp->r_statev4_lock);
1800 if (rp->r_deleg_type != OPEN_DELEGATE_NONE) {
1801 /*
1802 * If this rnode holds a delegation,
1803 * but if there are no valid open streams,
1804 * then just discard the delegation
1805 * without doing delegreturn.
1806 */
1807 if (numosp > 0)
1808 rp->r_deleg_needs_recovery =
1809 rp->r_deleg_type;
1810 }
1811 /* Save the delegation type for use outside the lock */
1812 dtype = rp->r_deleg_type;
1813 mutex_exit(&rp->r_statev4_lock);
1814
1815 /*
1816 * If we have a delegation then get rid of it.
1817 * We've set rp->r_deleg_needs_recovery so we have
1818 * enough information to recover.
1819 */
1820 if (dtype != OPEN_DELEGATE_NONE) {
1821 (void) nfs4delegreturn(rp, NFS4_DR_DISCARD);
1822 }
1823 }
1824 rw_exit(&rtable4[index].r_lock);
1825 }
1826 return (reopenlist);
1827 }
1828
1829 /*
1830 * Given a filesystem id, check to see if any rnodes
1831 * within this fsid reside in the rnode cache, other
1832 * than one we know about.
1833 *
1834 * Return 1 if an rnode is found, 0 otherwise
1835 */
1836 int
r4find_by_fsid(mntinfo4_t * mi,fattr4_fsid * moved_fsid)1837 r4find_by_fsid(mntinfo4_t *mi, fattr4_fsid *moved_fsid)
1838 {
1839 rnode4_t *rp;
1840 vnode_t *vp;
1841 vfs_t *vfsp = mi->mi_vfsp;
1842 fattr4_fsid *fsid;
1843 int index, found = 0;
1844
1845 for (index = 0; index < rtable4size; index++) {
1846 rw_enter(&rtable4[index].r_lock, RW_READER);
1847 for (rp = rtable4[index].r_hashf;
1848 rp != (rnode4_t *)(&rtable4[index]);
1849 rp = rp->r_hashf) {
1850
1851 vp = RTOV4(rp);
1852 if (vp->v_vfsp != vfsp)
1853 continue;
1854
1855 /*
1856 * XXX there might be a case where a
1857 * replicated fs may have the same fsid
1858 * across two different servers. This
1859 * check isn't good enough in that case
1860 */
1861 fsid = &rp->r_srv_fsid;
1862 if (FATTR4_FSID_EQ(moved_fsid, fsid)) {
1863 found = 1;
1864 break;
1865 }
1866 }
1867 rw_exit(&rtable4[index].r_lock);
1868
1869 if (found)
1870 break;
1871 }
1872 return (found);
1873 }
1874
1875 /*
1876 * Release the list of open instance references.
1877 */
1878
1879 void
r4releopenlist(nfs4_opinst_t * reopenp)1880 r4releopenlist(nfs4_opinst_t *reopenp)
1881 {
1882 nfs4_opinst_t *rep, *next;
1883 int i;
1884
1885 for (rep = reopenp; rep; rep = next) {
1886 next = rep->re_next;
1887
1888 for (i = 0; i < rep->re_numosp; i++)
1889 open_stream_rele(rep->re_osp[i], VTOR4(rep->re_vp));
1890
1891 VN_RELE(rep->re_vp);
1892 kmem_free(rep->re_osp,
1893 rep->re_numosp * sizeof (*(rep->re_osp)));
1894
1895 kmem_free(rep, sizeof (*rep));
1896 }
1897 }
1898
1899 int
nfs4_rnode_init(void)1900 nfs4_rnode_init(void)
1901 {
1902 ulong_t nrnode4_max;
1903 int i;
1904
1905 /*
1906 * Compute the size of the rnode4 hash table
1907 */
1908 if (nrnode <= 0)
1909 nrnode = ncsize;
1910 nrnode4_max =
1911 (ulong_t)((kmem_maxavail() >> 2) / sizeof (struct rnode4));
1912 if (nrnode > nrnode4_max || (nrnode == 0 && ncsize == 0)) {
1913 zcmn_err(GLOBAL_ZONEID, CE_NOTE,
1914 "!setting nrnode to max value of %ld", nrnode4_max);
1915 nrnode = nrnode4_max;
1916 }
1917 rtable4size = 1 << highbit(nrnode / rnode4_hashlen);
1918 rtable4mask = rtable4size - 1;
1919
1920 /*
1921 * Allocate and initialize the hash buckets
1922 */
1923 rtable4 = kmem_alloc(rtable4size * sizeof (*rtable4), KM_SLEEP);
1924 for (i = 0; i < rtable4size; i++) {
1925 rtable4[i].r_hashf = (rnode4_t *)(&rtable4[i]);
1926 rtable4[i].r_hashb = (rnode4_t *)(&rtable4[i]);
1927 rw_init(&rtable4[i].r_lock, NULL, RW_DEFAULT, NULL);
1928 }
1929
1930 rnode4_cache = kmem_cache_create("rnode4_cache", sizeof (rnode4_t),
1931 0, NULL, NULL, nfs4_reclaim, NULL, NULL, 0);
1932
1933 return (0);
1934 }
1935
1936 int
nfs4_rnode_fini(void)1937 nfs4_rnode_fini(void)
1938 {
1939 int i;
1940
1941 /*
1942 * Deallocate the rnode hash queues
1943 */
1944 kmem_cache_destroy(rnode4_cache);
1945
1946 for (i = 0; i < rtable4size; i++)
1947 rw_destroy(&rtable4[i].r_lock);
1948
1949 kmem_free(rtable4, rtable4size * sizeof (*rtable4));
1950
1951 return (0);
1952 }
1953
1954 /*
1955 * Return non-zero if the given filehandle refers to the root filehandle
1956 * for the given rnode.
1957 */
1958
1959 static int
isrootfh(nfs4_sharedfh_t * fh,rnode4_t * rp)1960 isrootfh(nfs4_sharedfh_t *fh, rnode4_t *rp)
1961 {
1962 int isroot;
1963
1964 isroot = 0;
1965 if (SFH4_SAME(VTOMI4(RTOV4(rp))->mi_rootfh, fh))
1966 isroot = 1;
1967
1968 return (isroot);
1969 }
1970
1971 /*
1972 * The r4_stub_* routines assume that the rnode is newly activated, and
1973 * that the caller either holds the hash bucket r_lock for this rnode as
1974 * RW_WRITER, or holds r_statelock.
1975 */
1976 static void
r4_stub_set(rnode4_t * rp,nfs4_stub_type_t type)1977 r4_stub_set(rnode4_t *rp, nfs4_stub_type_t type)
1978 {
1979 vnode_t *vp = RTOV4(rp);
1980 krwlock_t *hash_lock = &rp->r_hashq->r_lock;
1981
1982 ASSERT(RW_WRITE_HELD(hash_lock) || MUTEX_HELD(&rp->r_statelock));
1983
1984 rp->r_stub_type = type;
1985
1986 /*
1987 * Safely switch this vnode to the trigger vnodeops.
1988 *
1989 * Currently, we don't ever switch a trigger vnode back to using
1990 * "regular" v4 vnodeops. NFS4_STUB_NONE is only used to note that
1991 * a new v4 object is not a trigger, and it will already have the
1992 * correct v4 vnodeops by default. So, no "else" case required here.
1993 */
1994 if (type != NFS4_STUB_NONE)
1995 vn_setops(vp, nfs4_trigger_vnodeops);
1996 }
1997
1998 void
r4_stub_mirrormount(rnode4_t * rp)1999 r4_stub_mirrormount(rnode4_t *rp)
2000 {
2001 r4_stub_set(rp, NFS4_STUB_MIRRORMOUNT);
2002 }
2003
2004 void
r4_stub_referral(rnode4_t * rp)2005 r4_stub_referral(rnode4_t *rp)
2006 {
2007 DTRACE_PROBE1(nfs4clnt__func__referral__moved,
2008 vnode_t *, RTOV4(rp));
2009 r4_stub_set(rp, NFS4_STUB_REFERRAL);
2010 }
2011
2012 void
r4_stub_none(rnode4_t * rp)2013 r4_stub_none(rnode4_t *rp)
2014 {
2015 r4_stub_set(rp, NFS4_STUB_NONE);
2016 }
2017
2018 #ifdef DEBUG
2019
2020 /*
2021 * Look in the rnode table for other rnodes that have the same filehandle.
2022 * Assume the lock is held for the hash chain of checkrp
2023 */
2024
2025 static void
r4_dup_check(rnode4_t * checkrp,vfs_t * vfsp)2026 r4_dup_check(rnode4_t *checkrp, vfs_t *vfsp)
2027 {
2028 rnode4_t *rp;
2029 vnode_t *tvp;
2030 nfs4_fhandle_t fh, fh2;
2031 int index;
2032
2033 if (!r4_check_for_dups)
2034 return;
2035
2036 ASSERT(RW_LOCK_HELD(&checkrp->r_hashq->r_lock));
2037
2038 sfh4_copyval(checkrp->r_fh, &fh);
2039
2040 for (index = 0; index < rtable4size; index++) {
2041
2042 if (&rtable4[index] != checkrp->r_hashq)
2043 rw_enter(&rtable4[index].r_lock, RW_READER);
2044
2045 for (rp = rtable4[index].r_hashf;
2046 rp != (rnode4_t *)(&rtable4[index]);
2047 rp = rp->r_hashf) {
2048
2049 if (rp == checkrp)
2050 continue;
2051
2052 tvp = RTOV4(rp);
2053 if (tvp->v_vfsp != vfsp)
2054 continue;
2055
2056 sfh4_copyval(rp->r_fh, &fh2);
2057 if (nfs4cmpfhandle(&fh, &fh2) == 0) {
2058 cmn_err(CE_PANIC, "rnodes with same fs, fh "
2059 "(%p, %p)", (void *)checkrp, (void *)rp);
2060 }
2061 }
2062
2063 if (&rtable4[index] != checkrp->r_hashq)
2064 rw_exit(&rtable4[index].r_lock);
2065 }
2066 }
2067
2068 #endif /* DEBUG */
2069