xref: /titanic_44/usr/src/uts/common/fs/cachefs/cachefs_cnode.c (revision 2b4a78020b9c38d1b95e2f3fefa6d6e4be382d1f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 
29 #include <sys/param.h>
30 #include <sys/types.h>
31 #include <sys/systm.h>
32 #include <sys/cred.h>
33 #include <sys/proc.h>
34 #include <sys/user.h>
35 #include <sys/vfs.h>
36 #include <sys/vnode.h>
37 #include <sys/pathname.h>
38 #include <sys/uio.h>
39 #include <sys/tiuser.h>
40 #include <sys/sysmacros.h>
41 #include <sys/kmem.h>
42 #include <sys/buf.h>
43 #include <netinet/in.h>
44 #include <rpc/types.h>
45 #include <rpc/xdr.h>
46 #include <rpc/auth.h>
47 #include <rpc/clnt.h>
48 #include <sys/mount.h>
49 #include <sys/ioctl.h>
50 #include <sys/statvfs.h>
51 #include <sys/errno.h>
52 #include <sys/debug.h>
53 #include <sys/cmn_err.h>
54 #include <sys/utsname.h>
55 #include <sys/modctl.h>
56 #include <vm/pvn.h>
57 
58 #include <sys/fs/cachefs_fs.h>
59 
60 /*
61  * cachefs_max_idle is a global that is tunable.
62  * This value decides how frequently or when the
63  * cachefs_cnode_idleclean is run.
64  * The default value is set to CFS_FS_MAXIDLE.
65  * The tunable if set to X triggers a cleanup when
66  * the number of idle cnodes reach X, and cleans up
67  * (.25 * X) idle cnodes.
68  */
69 int cachefs_max_idle = CFS_FS_MAXIDLE;
70 
71 
72 struct kmem_cache *cachefs_cnode_cache = NULL;
73 
74 /*
75  * Functions for cnode management.
76  */
77 
78 /*
79  * Puts cnode on idle list.  Only call from an async thread or no
80  * locks held.
81  */
82 /*ARGSUSED1*/
83 void
84 cachefs_cnode_idle(struct vnode *vp, cred_t *cr)
85 {
86 	cnode_t *cp = VTOC(vp);
87 	fscache_t *fscp = C_TO_FSCACHE(cp);
88 	int cleanidle;
89 	vnode_t *unldvp;
90 	cred_t *unlcred;
91 	char *unlname;
92 	int error;
93 
94 	/*
95 	 * The key to this routine is not to drop the vnode count
96 	 * while on the idle list.  This prevents this routine from
97 	 * being called again by vn_rele on an inactive cnode.
98 	 * Nothing bad happens if an "active" cnode is put on the idle
99 	 * list.  It eventually gets pulled off.
100 	 * Also this routine is only called from a thread message sent
101 	 * by cachefs_inactive().  It is not safe for this routine
102 	 * to be the "inactive" entry point because of the dnlc.
103 	 */
104 
105 	for (;;) {
106 		/* get access to the file system */
107 		error = cachefs_cd_access(fscp, 0, 1);
108 		ASSERT(error == 0);
109 
110 		/* get exclusive access to this cnode */
111 		mutex_enter(&cp->c_statelock);
112 
113 		/* done with this loop if not unlinking a file */
114 		if (cp->c_unldvp == NULL)
115 			break;
116 
117 		/* get unlink info out of the cnode */
118 		unldvp = cp->c_unldvp;
119 		unlcred = cp->c_unlcred;
120 		unlname = cp->c_unlname;
121 		cp->c_unldvp = NULL;
122 		cp->c_unlcred = NULL;
123 		cp->c_unlname = NULL;
124 		mutex_exit(&cp->c_statelock);
125 
126 		/* finish the remove operation */
127 		if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
128 			error = cachefs_remove_connected(unldvp,
129 			    unlname, unlcred, vp);
130 		} else {
131 			error = cachefs_remove_disconnected(unldvp,
132 			    unlname, unlcred, vp);
133 		}
134 
135 		/* reacquire cnode lock */
136 		mutex_enter(&cp->c_statelock);
137 
138 		/* if a timeout occurred */
139 		if (CFS_TIMEOUT(fscp, error)) {
140 			/* restore cnode state */
141 			if (cp->c_unldvp == NULL) {
142 				cp->c_unldvp = unldvp;
143 				cp->c_unlcred = unlcred;
144 				cp->c_unlname = unlname;
145 				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
146 					mutex_exit(&cp->c_statelock);
147 					cachefs_cd_release(fscp);
148 					cachefs_cd_timedout(fscp);
149 					continue;
150 				} else {
151 					cp->c_flags |= CN_PENDRM;
152 					mutex_exit(&cp->c_statelock);
153 					goto out;
154 				}
155 			}
156 		}
157 		/* free up resources */
158 		VN_RELE(unldvp);
159 		cachefs_kmem_free(unlname, MAXNAMELEN);
160 		crfree(unlcred);
161 		break;
162 	}
163 
164 	ASSERT((cp->c_flags & CN_IDLE) == 0);
165 	/*
166 	 * If we are going to destroy this cnode,
167 	 * do it now instead of later.
168 	 */
169 	if (cp->c_flags & (CN_DESTROY | CN_STALE)) {
170 		mutex_exit(&cp->c_statelock);
171 		(void) cachefs_cnode_inactive(vp, cr);
172 		goto out;
173 	}
174 
175 	/*
176 	 * mark cnode as idle, put it on the idle list, and increment the
177 	 * number of idle cnodes
178 	 */
179 	cp->c_flags |= CN_IDLE;
180 	mutex_enter(&fscp->fs_idlelock);
181 	cachefs_cnode_idleadd(cp);
182 	if ((fscp->fs_idlecnt > cachefs_max_idle) &&
183 	    (fscp->fs_idleclean == 0) &&
184 	    (fscp->fs_cdtransition == 0)) {
185 		fscp->fs_idleclean = 1;
186 		cleanidle = 1;
187 	} else {
188 		cleanidle = 0;
189 	}
190 	mutex_exit(&fscp->fs_idlelock);
191 
192 	/* release cnode */
193 	mutex_exit(&cp->c_statelock);
194 
195 	/* if should reduce the number of idle cnodes */
196 	if (cleanidle) {
197 		ASSERT(fscp->fs_idlecnt > 1);
198 		fscache_hold(fscp);
199 		cachefs_cnode_idleclean(fscp, 0);
200 		/* XXX race with cachefs_unmount() calling destroy */
201 		fscache_rele(fscp);
202 	}
203 
204 out:
205 	/* release hold on the file system */
206 	/* XXX unmount() could have called destroy after fscache_rele() */
207 	cachefs_cd_release(fscp);
208 }
209 
210 /*
211  * Removes cnodes from the idle list and destroys them.
212  */
213 void
214 cachefs_cnode_idleclean(fscache_t *fscp, int unmount)
215 {
216 	int remcnt;
217 	cnode_t *cp;
218 
219 	mutex_enter(&fscp->fs_idlelock);
220 
221 	/* determine number of cnodes to destroy */
222 	if (unmount) {
223 		/* destroy all plus any that go idle while in this routine */
224 		remcnt = fscp->fs_idlecnt * 2;
225 	} else {
226 		/* reduce to 75% of max allowed idle cnodes */
227 		remcnt = (fscp->fs_idlecnt - cachefs_max_idle) +
228 		    (cachefs_max_idle >> 2);
229 	}
230 
231 	for (; remcnt > 0; remcnt--) {
232 		/* get cnode on back of idle list and hold it */
233 		cp = fscp->fs_idleback;
234 		if (cp == NULL)
235 			break;
236 		VN_HOLD(CTOV(cp));
237 		mutex_exit(&fscp->fs_idlelock);
238 
239 		/* if the cnode is still on the idle list */
240 		mutex_enter(&cp->c_statelock);
241 		if (cp->c_flags & CN_IDLE) {
242 			cp->c_flags &= ~CN_IDLE;
243 
244 			/* remove cnode from the idle list */
245 			mutex_enter(&fscp->fs_idlelock);
246 			cachefs_cnode_idlerem(cp);
247 			mutex_exit(&fscp->fs_idlelock);
248 			mutex_exit(&cp->c_statelock);
249 
250 			/* destroy the cnode */
251 			VN_RELE(CTOV(cp));
252 			(void) cachefs_cnode_inactive(CTOV(cp), kcred);
253 		} else {
254 			/* cnode went active, just skip it */
255 			mutex_exit(&cp->c_statelock);
256 			VN_RELE(CTOV(cp));
257 		}
258 		mutex_enter(&fscp->fs_idlelock);
259 	}
260 
261 	fscp->fs_idleclean = 0;
262 	mutex_exit(&fscp->fs_idlelock);
263 }
264 
265 /*
266  * This routine does the real work of inactivating a cachefs vnode.
267  */
268 int
269 cachefs_cnode_inactive(register struct vnode *vp, cred_t *cr)
270 {
271 	cnode_t *cp;
272 	struct fscache *fscp;
273 	struct filegrp *fgp;
274 	cachefscache_t *cachep;
275 	struct cachefs_metadata *mdp;
276 	int meta_destroyed = 0;
277 
278 	cp = VTOC(vp);
279 
280 	fscp = C_TO_FSCACHE(cp);
281 	cachep = fscp->fs_cache;
282 	ASSERT(cachep != NULL);
283 	fgp = cp->c_filegrp;
284 
285 	ASSERT((cp->c_flags & CN_IDLE) == 0);
286 
287 	/* truncate the front file if necessary */
288 	mutex_enter(&cp->c_statelock);
289 	if ((cp->c_flags & CN_NOCACHE) && (cp->c_metadata.md_flags & MD_FILE) &&
290 	    cp->c_metadata.md_frontblks) {
291 
292 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
293 
294 #ifdef CFSDEBUG
295 		CFS_DEBUG(CFSDEBUG_INVALIDATE)
296 			printf("c_cnode_inactive: invalidating %llu\n",
297 			    (u_longlong_t)cp->c_id.cid_fileno);
298 #endif
299 		/*
300 		 * If the cnode is being populated, and we're not the
301 		 * populating thread, then block until the pop thread
302 		 * completes.  If we are the pop thread, then we may come in
303 		 * here, but not to nuke the directory cnode at a critical
304 		 * juncture.
305 		 */
306 		while ((cp->c_flags & CN_ASYNC_POP_WORKING) &&
307 		    (cp->c_popthrp != curthread))
308 			cv_wait(&cp->c_popcv, &cp->c_statelock);
309 
310 		cachefs_inval_object(cp);
311 	}
312 	mutex_exit(&cp->c_statelock);
313 
314 	for (;;) {
315 		/* see if vnode is really inactive */
316 		mutex_enter(&vp->v_lock);
317 		ASSERT(vp->v_count > 0);
318 		if (vp->v_count > 1) {
319 			/*
320 			 * It's impossible for us to be cnode_inactive for
321 			 * the root cnode _unless_ we are being called from
322 			 * cachefs_unmount (where inactive is called
323 			 * explictly).  If the count is not 1, there is
324 			 * still an outstanding reference to the root cnode,
325 			 * and we return EBUSY; this allows cachefs_unmount
326 			 * to fail.
327 			 */
328 			if (cp->c_flags & CN_ROOT) {
329 				mutex_exit(&vp->v_lock);
330 				return (EBUSY);
331 			}
332 			cp->c_ipending = 0;
333 			vp->v_count--;	/* release our hold from vn_rele */
334 			mutex_exit(&vp->v_lock);
335 			return (0);
336 		}
337 		mutex_exit(&vp->v_lock);
338 
339 		/* get rid of any pages, do not care if cannot be pushed */
340 		if (vn_has_cached_data(vp)) {
341 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
342 			(void) cachefs_putpage_common(vp, (offset_t)0, 0,
343 			    B_INVAL | B_FORCE, cr);
344 		}
345 
346 		/* if need to sync metadata, the call is a no op for NFSv4 */
347 		if ((cp->c_flags & (CN_UPDATED | CN_DESTROY)) == CN_UPDATED) {
348 			(void) cachefs_sync_metadata(cp);
349 			continue;
350 		}
351 		break;
352 	}
353 
354 	/*
355 	 * Lock out possible race with makecachefsnode.
356 	 * Makecachefsnode will fix up the rl/active list stuff to
357 	 * be correct when it gets to run.
358 	 * We have to do the rl/active stuff while the cnode is on the hash
359 	 * list to sync actions on the rl/active list.
360 	 */
361 	mutex_enter(&fgp->fg_cnodelock);
362 	mutex_enter(&cp->c_statelock);
363 
364 	/* see if vnode is still inactive */
365 	mutex_enter(&vp->v_lock);
366 	ASSERT(vp->v_count > 0);
367 	if (vp->v_count > 1) {
368 		cp->c_ipending = 0;
369 		vp->v_count--;
370 		mutex_exit(&vp->v_lock);
371 		mutex_exit(&cp->c_statelock);
372 		mutex_exit(&fgp->fg_cnodelock);
373 #ifdef CFSDEBUG
374 		CFS_DEBUG(CFSDEBUG_INVALIDATE)
375 			printf("cachefs_cnode_inactive: %u vp %p\n",
376 			    vp->v_count, vp);
377 #endif
378 		return (0);
379 	}
380 	mutex_exit(&vp->v_lock);
381 
382 	/* check for race with remove */
383 	if (cp->c_unldvp) {
384 		mutex_exit(&cp->c_statelock);
385 		mutex_exit(&fgp->fg_cnodelock);
386 
387 		/* this causes cachefs_inactive to be called again */
388 		VN_RELE(vp);
389 		return (0);
390 	}
391 
392 	/* if any pages left, really get rid of them */
393 	if (vn_has_cached_data(vp)) {
394 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
395 		(void) pvn_vplist_dirty(vp, 0, NULL, B_INVAL | B_TRUNC, cr);
396 	}
397 	ASSERT(vp->v_count == 1);
398 
399 	mdp = &cp->c_metadata;
400 
401 	/* if we can (and should) destroy the front file and metadata */
402 	if ((cp->c_flags & (CN_DESTROY | CN_STALE)) &&
403 	    (fgp->fg_flags & CFS_FG_WRITE) && !CFS_ISFS_BACKFS_NFSV4(fscp)) {
404 		if (mdp->md_rlno) {
405 			cachefs_removefrontfile(mdp, &cp->c_id, fgp);
406 			cachefs_rlent_moveto(cachep, CACHEFS_RL_FREE,
407 			    mdp->md_rlno, 0);
408 			mdp->md_rlno = 0;
409 			mdp->md_rltype = CACHEFS_RL_NONE;
410 		}
411 		if ((cp->c_flags & CN_ALLOC_PENDING) == 0) {
412 			(void) filegrp_destroy_metadata(fgp, &cp->c_id);
413 			meta_destroyed = 1;
414 		}
415 	}
416 
417 	/* else put the front file on the gc list */
418 	else if (mdp->md_rlno &&
419 	    (fgp->fg_flags & CFS_FG_WRITE) &&
420 	    (cp->c_metadata.md_rltype == CACHEFS_RL_ACTIVE)) {
421 #ifdef CFSDEBUG
422 		cachefs_rlent_verify(cachep, CACHEFS_RL_ACTIVE,
423 		    mdp->md_rlno);
424 #endif /* CFSDEBUG */
425 
426 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
427 		cachefs_rlent_moveto(cachep, CACHEFS_RL_GC, mdp->md_rlno,
428 		    mdp->md_frontblks);
429 		mdp->md_rltype = CACHEFS_RL_GC;
430 		cp->c_flags |= CN_UPDATED;
431 	}
432 
433 	/* if idlelist pointer(s) not null, remove from idle list */
434 	if ((cp->c_idlefront != NULL) || (cp->c_idleback != NULL)) {
435 		mutex_enter(&fscp->fs_idlelock);
436 		cachefs_cnode_idlerem(cp);
437 		mutex_exit(&fscp->fs_idlelock);
438 	}
439 
440 	/* remove from the filegrp list prior to releasing the cnode lock */
441 	cachefs_cnode_listrem(cp);
442 
443 	mutex_exit(&cp->c_statelock);
444 	if (! meta_destroyed)
445 		(void) cachefs_sync_metadata(cp);
446 
447 	mutex_exit(&fgp->fg_cnodelock);
448 
449 	if (cp->c_cred != NULL) {
450 		crfree(cp->c_cred);
451 		cp->c_cred = NULL;
452 	}
453 
454 	if (cp->c_frontvp)
455 		VN_RELE(cp->c_frontvp);
456 
457 	if (cp->c_backvp)
458 		VN_RELE(cp->c_backvp);
459 
460 	if (cp->c_acldirvp)
461 		VN_RELE(cp->c_acldirvp);
462 
463 	rw_destroy(&cp->c_rwlock);
464 	mutex_destroy(&cp->c_statelock);
465 	cv_destroy(&cp->c_popcv);
466 	mutex_destroy(&cp->c_iomutex);
467 	cv_destroy(&cp->c_iocv);
468 
469 	/* free up cnode memory */
470 	vn_invalid(cp->c_vnode);
471 	vn_free(cp->c_vnode);
472 	kmem_cache_free(cachefs_cnode_cache, cp);
473 
474 	filegrp_rele(fgp);
475 	(void) fscache_cnodecnt(fscp, -1);
476 	return (0);
477 }
478 
479 /*
480  * Add a cnode to the filegrp list.
481  */
482 void
483 cachefs_cnode_listadd(struct cnode *cp)
484 {
485 	filegrp_t *fgp = cp->c_filegrp;
486 
487 	ASSERT(MUTEX_HELD(&fgp->fg_cnodelock));
488 	ASSERT(cp->c_next == NULL);
489 
490 	cp->c_next = fgp->fg_cnodelist;
491 	fgp->fg_cnodelist = cp;
492 }
493 
494 /*
495  * Remove a cnode from the filegrp list.
496  */
497 void
498 cachefs_cnode_listrem(struct cnode *cp)
499 {
500 	filegrp_t *fgp = cp->c_filegrp;
501 	struct cnode **headpp;
502 
503 #ifdef CFSDEBUG
504 	int found = 0;
505 #endif
506 
507 	ASSERT(MUTEX_HELD(&fgp->fg_cnodelock));
508 	ASSERT(cp->c_idleback == NULL);
509 	ASSERT(cp->c_idlefront == NULL);
510 
511 	for (headpp = &fgp->fg_cnodelist;
512 		*headpp != NULL; headpp = &(*headpp)->c_next) {
513 		if (*headpp == cp) {
514 			*headpp = cp->c_next;
515 			cp->c_next = NULL;
516 #ifdef CFSDEBUG
517 			found++;
518 #endif
519 			break;
520 		}
521 	}
522 #ifdef CFSDEBUG
523 	ASSERT(found);
524 #endif
525 }
526 
527 /*
528  * Add a cnode to the front of the fscache idle list.
529  */
530 void
531 cachefs_cnode_idleadd(struct cnode *cp)
532 {
533 	fscache_t *fscp = C_TO_FSCACHE(cp);
534 
535 	ASSERT(MUTEX_HELD(&cp->c_statelock));
536 	ASSERT(MUTEX_HELD(&fscp->fs_idlelock));
537 
538 	/* put cnode on the front of the idle list */
539 	cp->c_idlefront = fscp->fs_idlefront;
540 	cp->c_idleback =  NULL;
541 
542 	if (fscp->fs_idlefront)
543 		fscp->fs_idlefront->c_idleback = cp;
544 	else {
545 		ASSERT(fscp->fs_idleback == NULL);
546 		fscp->fs_idleback = cp;
547 	}
548 	fscp->fs_idlefront = cp;
549 	fscp->fs_idlecnt++;
550 }
551 
552 /*
553  * Remove a cnode from the fscache idle list.
554  */
555 void
556 cachefs_cnode_idlerem(struct cnode *cp)
557 {
558 	fscache_t *fscp = C_TO_FSCACHE(cp);
559 
560 	ASSERT(MUTEX_HELD(&cp->c_statelock));
561 	ASSERT(MUTEX_HELD(&fscp->fs_idlelock));
562 
563 	if (cp->c_idlefront == NULL) {
564 		ASSERT(fscp->fs_idleback == cp);
565 		fscp->fs_idleback = cp->c_idleback;
566 		if (fscp->fs_idleback != NULL)
567 			fscp->fs_idleback->c_idlefront = NULL;
568 	} else {
569 		cp->c_idlefront->c_idleback = cp->c_idleback;
570 	}
571 
572 	if (cp->c_idleback == NULL) {
573 		ASSERT(fscp->fs_idlefront == cp);
574 		fscp->fs_idlefront = cp->c_idlefront;
575 		if (fscp->fs_idlefront != NULL)
576 			fscp->fs_idlefront->c_idleback = NULL;
577 	} else {
578 		cp->c_idleback->c_idlefront = cp->c_idlefront;
579 		cp->c_idleback = NULL;
580 	}
581 	cp->c_idlefront = NULL;
582 	fscp->fs_idlecnt--;
583 	ASSERT(fscp->fs_idlecnt >= 0);
584 }
585 
586 /*
587  * Search the cnode list of the input file group, looking for a cnode which
588  * matches the supplied file ident fileno.
589  *
590  * Returns:
591  *	*cpp = NULL, if no valid matching cnode is found
592  *	*cpp = address of cnode with matching fileno, with c_statelock held
593  *	return status is 0 if no cnode found, or if found & cookies match
594  *	return status is 1 if a cnode was found, but the cookies don't match
595  *
596  * Note:  must grab the c_statelock for each cnode, or its state could
597  * change while we're processing it.  Also, if a cnode is found, must return
598  * with c_statelock still held, so that the cnode state cannot change until
599  * the calling routine releases the lock.
600  */
601 int
602 cachefs_cnode_find(filegrp_t *fgp, cfs_cid_t *cidp, fid_t *cookiep,
603     struct cnode **cpp, struct vnode *backvp, vattr_t *vap)
604 {
605 	struct cnode *cp;
606 	int badcookie = 0;
607 	uint32_t is_nfsv4;
608 
609 #ifdef CFSDEBUG
610 	CFS_DEBUG(CFSDEBUG_CNODE)
611 		cmn_err(CE_NOTE, "cachefs_cnode_find: fileno %llu fgp %p\n",
612 		    (u_longlong_t)cidp->cid_fileno, (void *)fgp);
613 #endif
614 	ASSERT(MUTEX_HELD(&fgp->fg_cnodelock));
615 
616 	*cpp = NULL;
617 	is_nfsv4 = CFS_ISFS_BACKFS_NFSV4(fgp->fg_fscp);
618 
619 	/*
620 	 * Cookie should be filled unless disconnected operation or
621 	 * backfilesystem is NFSv4
622 	 */
623 	if (cookiep == NULL && !CFS_ISFS_SNR(fgp->fg_fscp) &&
624 	    !CFS_ISFS_BACKFS_NFSV4(fgp->fg_fscp)) {
625 		goto out;
626 	}
627 
628 	for (cp = fgp->fg_cnodelist; cp != NULL; cp = cp->c_next) {
629 		mutex_enter(&cp->c_statelock);
630 
631 		if ((cidp->cid_fileno != cp->c_id.cid_fileno &&
632 			(is_nfsv4 == FALSE || cp->c_backvp != backvp)) ||
633 		    (cp->c_flags & (CN_STALE | CN_DESTROY))) {
634 			mutex_exit(&cp->c_statelock);
635 			continue;
636 		}
637 
638 		/*
639 		 * Having found a non stale, non destroy pending cnode with
640 		 * matching fileno, will be exiting the for loop, after
641 		 * determining return status
642 		 */
643 		*cpp = cp;
644 
645 		if ((cookiep != NULL) &&
646 		    ((cookiep->fid_len != cp->c_cookie.fid_len) ||
647 		    (bcmp((caddr_t)cookiep->fid_data,
648 		    (caddr_t)&cp->c_cookie.fid_data, cookiep->fid_len)) != 0)) {
649 #ifdef CFSDEBUG
650 			CFS_DEBUG(CFSDEBUG_GENERAL) {
651 				cmn_err(CE_NOTE,
652 				    "cachefs: dup fileno %llu, cp %p\n",
653 				    (u_longlong_t)cidp->cid_fileno, (void *)cp);
654 			}
655 #endif
656 			badcookie = 1;
657 		}
658 
659 		/*
660 		 * For NFSv4 since there is no fid, add a check to
661 		 * ensure the backvp and vap matches that in the cnode.
662 		 * If it doesn't then someone tried to use a stale cnode.
663 		 */
664 		if (is_nfsv4) {
665 			if (backvp && backvp != cp->c_backvp ||
666 			    vap && vap->va_type != cp->c_attr.va_type ||
667 			    cidp->cid_fileno != cp->c_id.cid_fileno) {
668 				CFS_DPRINT_BACKFS_NFSV4(C_TO_FSCACHE(cp),
669 				("cachefs_cnode_find (nfsv4): stale cnode "
670 				"cnode %p, backvp %p, new-backvp %p, vap %p "
671 				"fileno=%llx cp-fileno=%llx\n",
672 				cp, cp->c_backvp, backvp, vap,
673 				cidp->cid_fileno, cp->c_id.cid_fileno));
674 				badcookie = 1;
675 			}
676 		}
677 		break;
678 	}
679 out:
680 
681 #ifdef CFSDEBUG
682 	CFS_DEBUG(CFSDEBUG_CNODE)
683 		cmn_err(CE_NOTE, "cachefs_cnode_find: cp %p\n", (void *)*cpp);
684 #endif
685 	return (badcookie);
686 }
687 
688 /*
689  * We have to initialize the cnode contents. Fill in the contents from the
690  * cache (attrcache file), from the info passed in, whatever it takes.
691  */
692 static int
693 cachefs_cnode_init(cfs_cid_t *cidp, cnode_t *cp, fscache_t *fscp,
694     filegrp_t *fgp, fid_t *cookiep, vattr_t *vap, vnode_t *backvp,
695     int flag, cred_t *cr)
696 {
697 	int error = 0;
698 	int slotfound;
699 	vnode_t *vp;
700 	int null_cookie;
701 	cachefscache_t *cachep = fscp->fs_cache;
702 
703 	bzero(cp, sizeof (cnode_t));
704 	cp->c_vnode = vn_alloc(KM_SLEEP);
705 
706 	vp = CTOV(cp);
707 
708 	vp->v_data = (caddr_t)cp;
709 
710 	rw_init(&cp->c_rwlock, NULL, RW_DEFAULT, NULL);
711 	mutex_init(&cp->c_statelock, NULL, MUTEX_DEFAULT, NULL);
712 	cv_init(&cp->c_popcv, NULL, CV_DEFAULT, NULL);
713 	mutex_init(&cp->c_iomutex, NULL, MUTEX_DEFAULT, NULL);
714 	cv_init(&cp->c_iocv, NULL, CV_DEFAULT, NULL);
715 
716 	vn_setops(vp, cachefs_getvnodeops());
717 	cp->c_id = *cidp;
718 	if (backvp != NULL) {
719 		cp->c_backvp = backvp;
720 		VN_HOLD(backvp);
721 	}
722 	cp->c_flags |= flag;
723 	filegrp_hold(fgp);
724 	cp->c_filegrp = fgp;
725 	if (cookiep)
726 		cp->c_cookie = *cookiep;
727 	mutex_enter(&cp->c_statelock);
728 
729 	/*
730 	 * if nocache is set then ignore anything cached for this file,
731 	 * if nfsv4 flag is set, then create the cnode but don't do
732 	 * any caching.
733 	 */
734 	if (cp->c_flags & CN_NOCACHE || CFS_ISFS_BACKFS_NFSV4(fscp)) {
735 		/*
736 		 * this case only happens while booting without a cache
737 		 * or if NFSv4 is the backfilesystem
738 		 */
739 		ASSERT(!CFS_ISFS_SNR(fscp));
740 		ASSERT(fscp->fs_cdconnected == CFS_CD_CONNECTED);
741 		if (cookiep || CFS_ISFS_BACKFS_NFSV4(fscp)) {
742 			error = CFSOP_INIT_COBJECT(fscp, cp, vap, cr);
743 			if (error)
744 				goto out;
745 			cp->c_flags |= CN_UPDATED | CN_ALLOC_PENDING;
746 			ASSERT(cp->c_attr.va_type != 0);
747 			VN_SET_VFS_TYPE_DEV(vp, fscp->fs_cfsvfsp,
748 			    cp->c_attr.va_type, cp->c_attr.va_rdev);
749 			cachefs_cnode_setlocalstats(cp);
750 		} else
751 			error = ESTALE;
752 		goto out;
753 	}
754 
755 	/*
756 	 * see if there's a slot for this filegrp/cid fileno
757 	 * if not, and there's no cookie info, nothing can be done, but if
758 	 * there's cookie data indicate we need to create a metadata slot.
759 	 */
760 	slotfound = cachefs_cid_inuse(cp->c_filegrp, cidp);
761 	if (slotfound == 0) {
762 		if (cookiep == NULL) {
763 			error = ENOENT;
764 			goto out;
765 		}
766 		cp->c_flags |= CN_ALLOC_PENDING;
767 	} else {
768 		/*
769 		 * if a slot was found, then increment the slot in use count
770 		 * and try to read the metadata.
771 		 */
772 		cp->c_filegrp->fg_header->ach_count++;
773 		error = filegrp_read_metadata(cp->c_filegrp, cidp,
774 		    &cp->c_metadata);
775 	}
776 	/*
777 	 * if there wasn't a slot, or an attempt to read it results in ENOENT,
778 	 * then init the cache object, create the vnode, etc...
779 	 */
780 	if ((slotfound == 0) || (error == ENOENT)) {
781 		error = CFSOP_INIT_COBJECT(fscp, cp, vap, cr);
782 		if (error)
783 			goto out;
784 		ASSERT(cp->c_attr.va_type != 0);
785 		VN_SET_VFS_TYPE_DEV(vp, fscp->fs_cfsvfsp,
786 		    cp->c_attr.va_type, cp->c_attr.va_rdev);
787 		cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
788 	} else if (error == 0) {
789 		/* slot found, no error occurred on the metadata read */
790 		cp->c_size = cp->c_attr.va_size;
791 
792 		if ((cachep->c_flags & CACHE_CHECK_RLTYPE) &&
793 		    (cp->c_metadata.md_rlno != 0) &&
794 		    (cp->c_metadata.md_rltype == CACHEFS_RL_ACTIVE)) {
795 			rl_entry_t rl, *rlp;
796 
797 			mutex_enter(&cachep->c_contentslock);
798 			error = cachefs_rl_entry_get(cachep,
799 			    cp->c_metadata.md_rlno, &rlp);
800 			if (error) {
801 				mutex_exit(&cachep->c_contentslock);
802 				goto out;
803 			}
804 			rl = *rlp;
805 			mutex_exit(&cachep->c_contentslock);
806 			if (cp->c_metadata.md_rltype != rl.rl_current) {
807 				cp->c_flags |= CN_UPDATED;
808 				cp->c_metadata.md_rltype = rl.rl_current;
809 			}
810 		}
811 
812 		/*
813 		 * If no cookie is specified, or if this is a local file,
814 		 * accept the one in the metadata.
815 		 */
816 		null_cookie = 0;
817 		if ((cookiep == NULL) || (cp->c_id.cid_flags & CFS_CID_LOCAL)) {
818 			cookiep = &cp->c_metadata.md_cookie;
819 			null_cookie = 1;
820 		}
821 
822 		/* if cookies do not match, reset the metadata */
823 		if ((cookiep->fid_len != cp->c_cookie.fid_len) ||
824 		    (bcmp(&cookiep->fid_data, &cp->c_cookie.fid_data,
825 			(size_t)cookiep->fid_len) != 0)) {
826 			cp->c_cookie = *cookiep;
827 			cp->c_flags |= CN_UPDATED;
828 			cp->c_metadata.md_timestamp.tv_sec = 0;
829 			/* clear all but the front file bit */
830 			cp->c_metadata.md_flags &= MD_FILE;
831 			error = CFSOP_INIT_COBJECT(fscp, cp, vap, cr);
832 			ASSERT(cp->c_attr.va_type != 0);
833 			VN_SET_VFS_TYPE_DEV(vp, fscp->fs_cfsvfsp,
834 			    cp->c_attr.va_type, cp->c_attr.va_rdev);
835 		}
836 
837 		/* else if the consistency type changed, fix it up */
838 		else if (cp->c_metadata.md_consttype != fscp->fs_consttype) {
839 			ASSERT(cp->c_attr.va_type != 0);
840 			VN_SET_VFS_TYPE_DEV(vp, fscp->fs_cfsvfsp,
841 			    cp->c_attr.va_type, cp->c_attr.va_rdev);
842 			CFSOP_CONVERT_COBJECT(fscp, cp, cr);
843 			if (!null_cookie) {
844 				error = CFSOP_CHECK_COBJECT(fscp, cp,
845 				    C_BACK_CHECK, cr);
846 			}
847 		}
848 
849 		/* else check the consistency of the data */
850 		else {
851 			ASSERT(cp->c_attr.va_type != 0);
852 			VN_SET_VFS_TYPE_DEV(vp, fscp->fs_cfsvfsp,
853 			    cp->c_attr.va_type, cp->c_attr.va_rdev);
854 			if (!null_cookie) {
855 				error = CFSOP_CHECK_COBJECT(fscp, cp, 0, cr);
856 			}
857 		}
858 	} else {
859 		goto out;
860 	}
861 	cachefs_cnode_setlocalstats(cp);
862 
863 out:
864 	mutex_exit(&cp->c_statelock);
865 	if (error) {
866 		if (cp->c_frontvp)
867 			VN_RELE(cp->c_frontvp);
868 		if (cp->c_backvp)
869 			VN_RELE(cp->c_backvp);
870 		if (cp->c_acldirvp)
871 			VN_RELE(cp->c_acldirvp);
872 		filegrp_rele(fgp);
873 		rw_destroy(&cp->c_rwlock);
874 		mutex_destroy(&cp->c_statelock);
875 		cv_destroy(&cp->c_popcv);
876 		mutex_destroy(&cp->c_iomutex);
877 		cv_destroy(&cp->c_iocv);
878 	}
879 	return (error);
880 }
881 
882 /*
883  * Finds the cnode for the specified fileno and fid.
884  * Creates the cnode if it does not exist.
885  * The cnode is returned held.
886  */
887 int
888 cachefs_cnode_make(cfs_cid_t *cidp, fscache_t *fscp, fid_t *cookiep,
889 	vattr_t *vap, vnode_t *backvp, cred_t *cr, int flag, cnode_t **cpp)
890 {
891 	struct cnode *cp;
892 	int error;
893 	struct filegrp *fgp;
894 	struct cachefs_metadata *mdp;
895 	fid_t cookie;
896 
897 #ifdef CFSDEBUG
898 	CFS_DEBUG(CFSDEBUG_CNODE)
899 		printf("cachefs_cnode_make: ENTER fileno %llu\n",
900 		    (u_longlong_t)cidp->cid_fileno);
901 #endif
902 
903 	/* get the file group that owns this file */
904 	mutex_enter(&fscp->fs_fslock);
905 	fgp = filegrp_list_find(fscp, cidp);
906 	if (fgp == NULL) {
907 		fgp = filegrp_create(fscp, cidp);
908 		filegrp_list_add(fscp, fgp);
909 	}
910 	filegrp_hold(fgp);
911 	mutex_exit(&fscp->fs_fslock);
912 
913 	/* grab the cnode list lock */
914 	mutex_enter(&fgp->fg_cnodelock);
915 
916 	if ((fgp->fg_flags & CFS_FG_READ) == 0)
917 		flag |= CN_NOCACHE;
918 
919 	error = 0;
920 	cp = NULL;
921 
922 	/* look for the cnode on the cnode list */
923 	error = cachefs_cnode_find(fgp, cidp, cookiep, &cp, backvp, vap);
924 
925 	/*
926 	 * If there already is a cnode with this cid but a different cookie,
927 	 * (or backvp) we're not going to be using the one we found.
928 	 */
929 	if (error && CFS_ISFS_BACKFS_NFSV4(fscp)) {
930 		ASSERT(MUTEX_HELD(&cp->c_statelock));
931 		cachefs_cnode_stale(cp);
932 		mutex_exit(&cp->c_statelock);
933 		cp = NULL;
934 		error = 0;
935 	} else if (error) {
936 		ASSERT(cp);
937 		ASSERT(cookiep);
938 
939 		mutex_exit(&cp->c_statelock);
940 
941 		/*
942 		 * If backvp is NULL then someone tried to use
943 		 * a stale cookie.
944 		 */
945 		if (backvp == NULL) {
946 			mutex_exit(&fgp->fg_cnodelock);
947 			error = ESTALE;
948 			goto out;
949 		}
950 
951 		/* verify the backvp */
952 		error = cachefs_getcookie(backvp, &cookie, NULL, cr, TRUE);
953 		if (error ||
954 		    ((cookiep->fid_len != cookie.fid_len) ||
955 		    (bcmp(&cookiep->fid_data, cookie.fid_data,
956 			(size_t)cookiep->fid_len) != 0))) {
957 			mutex_exit(&fgp->fg_cnodelock);
958 			error = ESTALE;
959 			goto out;
960 		}
961 
962 		/* make the old cnode give up its front file resources */
963 		VN_HOLD(CTOV(cp));
964 		(void) cachefs_sync_metadata(cp);
965 		mutex_enter(&cp->c_statelock);
966 		mdp = &cp->c_metadata;
967 		if (mdp->md_rlno) {
968 			/* XXX sam: should this assert be NOCACHE? */
969 			/* XXX sam: maybe we should handle NOFILL as no-op */
970 			ASSERT((fscp->fs_cache->c_flags & CACHE_NOFILL) == 0);
971 
972 			/* if modified in the cache, move to lost+found */
973 			if ((cp->c_attr.va_type == VREG) &&
974 			    (cp->c_metadata.md_rltype == CACHEFS_RL_MODIFIED)) {
975 				error = cachefs_cnode_lostfound(cp, NULL);
976 				if (error) {
977 					mutex_exit(&cp->c_statelock);
978 					VN_RELE(CTOV(cp));
979 					mutex_exit(&fgp->fg_cnodelock);
980 					error = ESTALE;
981 					goto out;
982 				}
983 			}
984 
985 			/* else nuke the front file */
986 			else {
987 				cachefs_cnode_stale(cp);
988 			}
989 		} else {
990 			cachefs_cnode_stale(cp);
991 		}
992 		mutex_exit(&cp->c_statelock);
993 		VN_RELE(CTOV(cp));
994 		cp = NULL;
995 		error = 0;
996 	}
997 
998 
999 	/* if the cnode does not exist */
1000 	if (cp == NULL) {
1001 		/* XXX should we drop all locks for this? */
1002 		cp = kmem_cache_alloc(cachefs_cnode_cache, KM_SLEEP);
1003 
1004 		error = cachefs_cnode_init(cidp, cp, fscp, fgp,
1005 		    cookiep, vap, backvp, flag, cr);
1006 		if (error) {
1007 			mutex_exit(&fgp->fg_cnodelock);
1008 			vn_free(cp->c_vnode);
1009 			kmem_cache_free(cachefs_cnode_cache, cp);
1010 			goto out;
1011 		}
1012 
1013 		if (cp->c_metadata.md_rlno &&
1014 		    (cp->c_metadata.md_rltype == CACHEFS_RL_GC) &&
1015 		    ((fscp->fs_cache->c_flags & CACHE_NOFILL) == 0)) {
1016 #ifdef CFSDEBUG
1017 			cachefs_rlent_verify(fscp->fs_cache,
1018 			    CACHEFS_RL_GC, cp->c_metadata.md_rlno);
1019 #endif /* CFSDEBUG */
1020 			cachefs_rlent_moveto(fscp->fs_cache,
1021 			    CACHEFS_RL_ACTIVE, cp->c_metadata.md_rlno,
1022 			    cp->c_metadata.md_frontblks);
1023 			cp->c_metadata.md_rltype = CACHEFS_RL_ACTIVE;
1024 			cp->c_flags |= CN_UPDATED;
1025 		}
1026 
1027 		cachefs_cnode_listadd(cp);
1028 		vn_exists(cp->c_vnode);
1029 		mutex_exit(&fgp->fg_cnodelock);
1030 		(void) fscache_cnodecnt(fscp, 1);
1031 	}
1032 
1033 	/* else if the cnode exists */
1034 	else {
1035 		VN_HOLD(CTOV(cp));
1036 
1037 		/* remove from idle list if on it */
1038 		if (cp->c_flags & CN_IDLE) {
1039 			cp->c_flags &= ~CN_IDLE;
1040 
1041 			mutex_enter(&fscp->fs_idlelock);
1042 			cachefs_cnode_idlerem(cp);
1043 			mutex_exit(&fscp->fs_idlelock);
1044 			VN_RELE(CTOV(cp));
1045 			cp->c_ipending = 0;
1046 		}
1047 		mutex_exit(&cp->c_statelock);
1048 		mutex_exit(&fgp->fg_cnodelock);
1049 	}
1050 
1051 	/*
1052 	 * Assertion to ensure the cnode matches
1053 	 * the backvp and attribute type information.
1054 	 */
1055 	ASSERT((CFS_ISFS_BACKFS_NFSV4(fscp) == 0) ||
1056 		((cp->c_backvp == backvp) &&
1057 		(cp->c_attr.va_type == vap->va_type)));
1058 out:
1059 	*cpp = ((error == 0) ? cp : NULL);
1060 	filegrp_rele(fgp);
1061 
1062 #ifdef CFSDEBUG
1063 	CFS_DEBUG(CFSDEBUG_CNODE)
1064 		printf("cachefs_cnode_make: EXIT cp %p, error %d\n",
1065 		    (void *)*cpp, error);
1066 #endif
1067 	return (error);
1068 }
1069 
1070 /*
1071  * cachefs_cid_inuse()
1072  *
1073  * returns nonzero if a cid has any data in the cache; either a cnode
1074  * or metadata.
1075  */
1076 
1077 int
1078 cachefs_cid_inuse(filegrp_t *fgp, cfs_cid_t *cidp)
1079 {
1080 	cnode_t *cp;
1081 	int status = 0;
1082 
1083 	ASSERT(MUTEX_HELD(&fgp->fg_cnodelock));
1084 
1085 	/*
1086 	 * Since we don't care about the cookie data, we don't care about any
1087 	 * status that find might return.
1088 	 */
1089 
1090 	cp = NULL;
1091 	(void) cachefs_cnode_find(fgp, cidp, NULL, &cp, NULL, NULL);
1092 	if (cp != NULL) {
1093 		mutex_exit(&cp->c_statelock);
1094 		status = 1;
1095 		return (status);
1096 	}
1097 
1098 	/*
1099 	 * Don't want to use filegrp_read_metadata, since it will return
1100 	 * ENOENT if the metadata slot exists but hasn't been written to yet.
1101 	 * That condition still counts as the slot (metadata) being in use.
1102 	 * Instead, as long as the filegrp attrcache has been created and
1103 	 * there's a slot assigned for this cid, then the metadata is in use.
1104 	 */
1105 	if (((fgp->fg_flags & CFS_FG_ALLOC_ATTR) == 0) &&
1106 	    (filegrp_cid_to_slot(fgp, cidp) != 0))
1107 		status = 1;
1108 
1109 	return (status);
1110 }
1111 
1112 /*
1113  * cachefs_fileno_inuse()
1114  *
1115  * returns nonzero if a fileno is known to the cache, as either a
1116  * local or a normal file.
1117  */
1118 
1119 int
1120 cachefs_fileno_inuse(fscache_t *fscp, ino64_t fileno)
1121 {
1122 	cfs_cid_t cid;
1123 	filegrp_t *fgp;
1124 	int known = 0;
1125 
1126 	ASSERT(MUTEX_HELD(&fscp->fs_fslock));
1127 	cid.cid_fileno = fileno;
1128 
1129 	/* if there's no filegrp for this cid range, then there's no data */
1130 	fgp = filegrp_list_find(fscp, &cid);
1131 	if (fgp == NULL)
1132 		return (known);
1133 
1134 	filegrp_hold(fgp);
1135 	mutex_enter(&fgp->fg_cnodelock);
1136 
1137 	cid.cid_flags = CFS_CID_LOCAL;
1138 	if (cachefs_cid_inuse(fgp, &cid)) {
1139 		known = 1;
1140 		goto out;
1141 	}
1142 	cid.cid_flags = 0;
1143 	if (cachefs_cid_inuse(fgp, &cid))
1144 		known = 1;
1145 out:
1146 	mutex_exit(&fgp->fg_cnodelock);
1147 	filegrp_rele(fgp);
1148 	return (known);
1149 }
1150 
1151 /*
1152  * Creates a cnode from an unused inode in the cache.
1153  * The cnode is returned held.
1154  */
1155 int
1156 cachefs_cnode_create(fscache_t *fscp, vattr_t *vap, int flag, cnode_t **cpp)
1157 {
1158 	struct cnode *cp;
1159 	int error, found;
1160 	struct filegrp *fgp;
1161 	cfs_cid_t cid, cid2;
1162 
1163 	ASSERT(CFS_ISFS_SNR(fscp));
1164 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
1165 
1166 	cid.cid_flags = CFS_CID_LOCAL;
1167 	cid2.cid_flags = 0;
1168 
1169 	/* find an unused local file in the cache */
1170 	for (;;) {
1171 		mutex_enter(&fscp->fs_fslock);
1172 
1173 		/* make sure we did not wrap */
1174 		fscp->fs_info.fi_localfileno++;
1175 		if (fscp->fs_info.fi_localfileno == 0)
1176 			fscp->fs_info.fi_localfileno = 3;
1177 		cid.cid_fileno = fscp->fs_info.fi_localfileno;
1178 		fscp->fs_flags |= CFS_FS_DIRTYINFO;
1179 
1180 		/* avoid fileno conflict in non-local space */
1181 		cid2.cid_fileno = cid.cid_fileno;
1182 		fgp = filegrp_list_find(fscp, &cid2);
1183 		if (fgp != NULL) {
1184 			filegrp_hold(fgp);
1185 			mutex_enter(&fgp->fg_cnodelock);
1186 			found = cachefs_cid_inuse(fgp, &cid2);
1187 			mutex_exit(&fgp->fg_cnodelock);
1188 			filegrp_rele(fgp);
1189 			if (found) {
1190 				mutex_exit(&fscp->fs_fslock);
1191 				continue;
1192 			}
1193 		}
1194 
1195 		/* get the file group that owns this fileno */
1196 		fgp = filegrp_list_find(fscp, &cid);
1197 		if (fgp == NULL) {
1198 			fgp = filegrp_create(fscp, &cid);
1199 			filegrp_list_add(fscp, fgp);
1200 		}
1201 
1202 		/* see if there is any room left in this file group */
1203 		mutex_enter(&fgp->fg_mutex);
1204 		if (fgp->fg_header &&
1205 		    (fgp->fg_header->ach_count ==
1206 		    fscp->fs_info.fi_fgsize)) {
1207 			/* no more room, set up for the next file group */
1208 			fscp->fs_info.fi_localfileno = fgp->fg_id.cid_fileno +
1209 			    fscp->fs_info.fi_fgsize;
1210 			mutex_exit(&fgp->fg_mutex);
1211 			mutex_exit(&fscp->fs_fslock);
1212 			continue;
1213 		}
1214 		mutex_exit(&fgp->fg_mutex);
1215 
1216 		filegrp_hold(fgp);
1217 		mutex_exit(&fscp->fs_fslock);
1218 
1219 		ASSERT((fgp->fg_flags &
1220 		    (CFS_FG_READ | CFS_FG_WRITE)) ==
1221 		    (CFS_FG_READ | CFS_FG_WRITE));
1222 
1223 		/* grab the cnode list lock */
1224 		mutex_enter(&fgp->fg_cnodelock);
1225 
1226 		if ((fgp->fg_flags & CFS_FG_READ) == 0)
1227 			flag |= CN_NOCACHE;
1228 
1229 		/* keep looking if a cnode or metadata exist for this fileno */
1230 		if (cachefs_cid_inuse(fgp, &cid)) {
1231 			mutex_exit(&fgp->fg_cnodelock);
1232 			filegrp_rele(fgp);
1233 #ifdef CFSDEBUG
1234 			CFS_DEBUG(CFSDEBUG_CNODE)
1235 				cmn_err(CE_NOTE, "cachefs_cnode_create: "
1236 				    "fileno %llu exists.\n",
1237 				    (u_longlong_t)cid.cid_fileno);
1238 #endif
1239 			continue;
1240 		}
1241 		break;
1242 	}
1243 
1244 	vap->va_nodeid = cid.cid_fileno;
1245 
1246 	/* create space for the cnode */
1247 	cp = kmem_cache_alloc(cachefs_cnode_cache, KM_SLEEP);
1248 
1249 	/* set up the cnode */
1250 	error = cachefs_cnode_init(&cid, cp, fscp, fgp,
1251 	    &cp->c_cookie, vap, NULL, flag, kcred);
1252 	if (error) {
1253 		mutex_exit(&fgp->fg_cnodelock);
1254 		vn_free(cp->c_vnode);
1255 		kmem_cache_free(cachefs_cnode_cache, cp);
1256 		goto out;
1257 	}
1258 
1259 	/* save copy of fileno that is returned to the user */
1260 	cp->c_metadata.md_flags |= MD_LOCALFILENO;
1261 	cp->c_metadata.md_localfileno = cid.cid_fileno;
1262 	cp->c_flags |= CN_UPDATED;
1263 
1264 	cachefs_cnode_listadd(cp);
1265 	mutex_exit(&fgp->fg_cnodelock);
1266 	(void) fscache_cnodecnt(fscp, 1);
1267 
1268 out:
1269 	*cpp = ((error == 0) ? cp : NULL);
1270 	filegrp_rele(fgp);
1271 	return (error);
1272 }
1273 
1274 /*
1275  * Moves the cnode to its new location in the cache.
1276  * Before calling this routine other steps must be taken
1277  * to ensure that other file system routines that operate
1278  * on cnodes do not run.
1279  */
1280 void
1281 cachefs_cnode_move(cnode_t *cp)
1282 {
1283 	fscache_t *fscp = C_TO_FSCACHE(cp);
1284 	cfs_cid_t cid;
1285 	filegrp_t *fgp;
1286 	filegrp_t *ofgp = cp->c_filegrp;
1287 	struct cachefs_metadata *mdp;
1288 	cnode_t *xcp;
1289 	char oname[CFS_FRONTFILE_NAME_SIZE];
1290 	char nname[CFS_FRONTFILE_NAME_SIZE];
1291 	int ffnuke = 0;
1292 	int error;
1293 
1294 	ASSERT(CFS_ISFS_SNR(fscp));
1295 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
1296 	ASSERT(cp->c_id.cid_flags & CFS_CID_LOCAL);
1297 	ASSERT(cp->c_attr.va_nodeid != 0);
1298 
1299 	/* construct the cid of the new file location */
1300 	cid.cid_fileno = cp->c_attr.va_nodeid;
1301 	cid.cid_flags = 0;
1302 
1303 	/* see if there already is a file occupying our slot */
1304 	error = cachefs_cnode_make(&cid, fscp, NULL, NULL, NULL, kcred,
1305 	    0, &xcp);
1306 	if (error == 0) {
1307 		mutex_enter(&xcp->c_statelock);
1308 		cachefs_cnode_stale(xcp);
1309 		mutex_exit(&xcp->c_statelock);
1310 		VN_RELE(CTOV(xcp));
1311 		xcp = NULL;
1312 		error = 0;
1313 	}
1314 
1315 	/* get the file group that this file is moving to */
1316 	mutex_enter(&fscp->fs_fslock);
1317 	fgp = filegrp_list_find(fscp, &cid);
1318 	if (fgp == NULL) {
1319 		fgp = filegrp_create(fscp, &cid);
1320 		filegrp_list_add(fscp, fgp);
1321 	}
1322 	filegrp_hold(fgp);
1323 	mutex_exit(&fscp->fs_fslock);
1324 
1325 	/* XXX fix to not have to create metadata to hold rl slot */
1326 	/* get a metadata slot in the new file group */
1327 	if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) {
1328 		(void) filegrp_allocattr(fgp);
1329 	}
1330 	/* XXX can fix create_metadata to call allocattr if necessary? */
1331 	error = filegrp_create_metadata(fgp, &cp->c_metadata, &cid);
1332 	if (error)
1333 		ffnuke = 1;
1334 	if ((ffnuke == 0) && filegrp_ffhold(fgp))
1335 		ffnuke = 1;
1336 
1337 	/* move the front file to the new file group */
1338 	if ((ffnuke == 0) && (cp->c_metadata.md_flags & MD_FILE)) {
1339 		make_ascii_name(&cp->c_id, oname);
1340 		make_ascii_name(&cid, nname);
1341 		error = VOP_RENAME(ofgp->fg_dirvp, oname, fgp->fg_dirvp,
1342 			nname, kcred, NULL, 0);
1343 		if (error) {
1344 			ffnuke = 1;
1345 #ifdef CFSDEBUG
1346 			if (error != ENOSPC) {
1347 				CFS_DEBUG(CFSDEBUG_CNODE)
1348 					printf("cachefs: cnode_move "
1349 					    "1: error %d\n", error);
1350 			}
1351 #endif
1352 		}
1353 	}
1354 
1355 	/* remove the file from the old file group */
1356 	mutex_enter(&ofgp->fg_cnodelock);
1357 	mutex_enter(&cp->c_statelock);
1358 	if (cp->c_frontvp) {
1359 		VN_RELE(cp->c_frontvp);
1360 		cp->c_frontvp = NULL;
1361 	}
1362 	if (cp->c_acldirvp) {
1363 		VN_RELE(cp->c_acldirvp);
1364 		cp->c_acldirvp = NULL;
1365 	}
1366 	mdp = &cp->c_metadata;
1367 	if (mdp->md_rlno) {
1368 		if (ffnuke) {
1369 			cachefs_removefrontfile(mdp, &cp->c_id, ofgp);
1370 			cachefs_rlent_moveto(fscp->fs_cache,
1371 			    CACHEFS_RL_FREE, mdp->md_rlno, 0);
1372 			mdp->md_rlno = 0;
1373 			mdp->md_rltype = CACHEFS_RL_NONE;
1374 		} else {
1375 			filegrp_ffrele(ofgp);
1376 		}
1377 	}
1378 	if (ffnuke)
1379 		mdp->md_flags &= ~MD_PACKED;
1380 	if ((cp->c_flags & CN_ALLOC_PENDING) == 0) {
1381 		(void) filegrp_destroy_metadata(ofgp, &cp->c_id);
1382 		cp->c_flags |= CN_ALLOC_PENDING;
1383 	}
1384 	cachefs_cnode_listrem(cp);
1385 	cp->c_filegrp = NULL;
1386 	mutex_exit(&cp->c_statelock);
1387 	mutex_exit(&ofgp->fg_cnodelock);
1388 
1389 	/* add the cnode to the new file group */
1390 	mutex_enter(&fgp->fg_cnodelock);
1391 	mutex_enter(&cp->c_statelock);
1392 	cp->c_id = cid;
1393 	cp->c_filegrp = fgp;
1394 	cp->c_flags |= CN_UPDATED;
1395 	mutex_exit(&cp->c_statelock);
1396 	cachefs_cnode_listadd(cp);
1397 	if (mdp->md_rlno)
1398 		cachefs_rl_changefileno(fscp->fs_cache, mdp->md_rlno,
1399 		    cp->c_id.cid_fileno);
1400 	mutex_exit(&fgp->fg_cnodelock);
1401 
1402 	filegrp_rele(ofgp);
1403 }
1404 
1405 /*
1406  * Syncs out the specified cnode.
1407  * Only called via cnode_traverse from fscache_sync
1408  */
1409 void
1410 cachefs_cnode_sync(cnode_t *cp)
1411 {
1412 	vnode_t *vp = CTOV(cp);
1413 	int error = 0;
1414 	fscache_t *fscp = C_TO_FSCACHE(cp);
1415 	int held = 0;
1416 
1417 	if (cp->c_flags & (CN_STALE | CN_DESTROY))
1418 		return;
1419 
1420 	if (fscp->fs_backvfsp && fscp->fs_backvfsp->vfs_flag & VFS_RDONLY)
1421 		return;
1422 
1423 	for (;;) {
1424 		/* get (or renew) access to the file system */
1425 		if (held) {
1426 			cachefs_cd_release(fscp);
1427 			held = 0;
1428 		}
1429 		/*
1430 		 * Getting file system access for reading is really cheating.
1431 		 * However we are getting called from sync so we do not
1432 		 * want to hang up if the cachefsd is not running.
1433 		 */
1434 		error = cachefs_cd_access(fscp, 0, 0);
1435 		if (error)
1436 			break;
1437 		held = 1;
1438 
1439 		/* if a regular file, write out the pages */
1440 		if ((vp->v_type == VREG) && vn_has_cached_data(vp)) {
1441 			ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
1442 			error = cachefs_putpage_common(vp, (offset_t)0,
1443 			    0, 0, kcred);
1444 			if (CFS_TIMEOUT(fscp, error)) {
1445 				if (fscp->fs_cdconnected == CFS_CD_CONNECTED) {
1446 					cachefs_cd_release(fscp);
1447 					held = 0;
1448 					cachefs_cd_timedout(fscp);
1449 					continue;
1450 				} else {
1451 					/* cannot push, give up */
1452 					break;
1453 				}
1454 			}
1455 
1456 			/* clear the cnode error if putpage worked */
1457 			if ((error == 0) && cp->c_error) {
1458 				mutex_enter(&cp->c_statelock);
1459 				cp->c_error = 0;
1460 				mutex_exit(&cp->c_statelock);
1461 			}
1462 
1463 			if (error)
1464 				break;
1465 		}
1466 
1467 		/* if connected, sync the backvp */
1468 		if ((fscp->fs_cdconnected == CFS_CD_CONNECTED) &&
1469 		    cp->c_backvp) {
1470 			mutex_enter(&cp->c_statelock);
1471 			if (cp->c_backvp) {
1472 				error = VOP_FSYNC(cp->c_backvp, FSYNC, kcred,
1473 				    NULL);
1474 				if (CFS_TIMEOUT(fscp, error)) {
1475 					mutex_exit(&cp->c_statelock);
1476 					cachefs_cd_release(fscp);
1477 					held = 0;
1478 					cachefs_cd_timedout(fscp);
1479 					continue;
1480 				} else if (error && (error != EINTR))
1481 					cp->c_error = error;
1482 			}
1483 			mutex_exit(&cp->c_statelock);
1484 		}
1485 
1486 		/* sync the metadata and the front file to the front fs */
1487 		(void) cachefs_sync_metadata(cp);
1488 		break;
1489 	}
1490 
1491 	if (held)
1492 		cachefs_cd_release(fscp);
1493 }
1494 
1495 /*
1496  * Moves the specified file to the lost+found directory for the
1497  * cached file system.
1498  * Invalidates cached data and attributes.
1499  * Returns 0 or an error if could not perform operation.
1500  */
1501 int
1502 cachefs_cnode_lostfound(cnode_t *cp, char *rname)
1503 {
1504 	int error = 0;
1505 	fscache_t *fscp;
1506 	cachefscache_t *cachep;
1507 	char oname[CFS_FRONTFILE_NAME_SIZE];
1508 	filegrp_t *fgp;
1509 	char *namep, *strp;
1510 	char *namebuf = NULL;
1511 	vnode_t *nvp;
1512 	int index;
1513 	int len;
1514 
1515 	fscp = C_TO_FSCACHE(cp);
1516 	cachep = fscp->fs_cache;
1517 
1518 	ASSERT(MUTEX_HELD(&cp->c_statelock));
1519 	ASSERT((cachep->c_flags & (CACHE_NOCACHE|CACHE_NOFILL)) == 0);
1520 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
1521 
1522 	fgp = cp->c_filegrp;
1523 
1524 	/* set up the file group if necessary */
1525 	if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) {
1526 		error = filegrp_allocattr(fgp);
1527 		if (error)
1528 			goto out;
1529 	}
1530 	ASSERT(fgp->fg_dirvp);
1531 
1532 	namebuf = cachefs_kmem_alloc(MAXNAMELEN * 2, KM_SLEEP);
1533 
1534 	if ((cp->c_attr.va_type != VREG) ||
1535 	    (cp->c_metadata.md_rltype != CACHEFS_RL_MODIFIED) ||
1536 	    ((cp->c_metadata.md_flags & MD_POPULATED) == 0) ||
1537 	    ((cp->c_metadata.md_flags & MD_FILE) == 0) ||
1538 	    (cp->c_metadata.md_rlno == 0)) {
1539 #ifdef CFSDEBUG
1540 		CFS_DEBUG(CFSDEBUG_CNODE)
1541 			printf("cachefs_cnode_lostfound cp %p cannot save\n",
1542 			    (void *)cp);
1543 #endif
1544 		error = EINVAL;
1545 		goto out;
1546 	}
1547 
1548 	/* lock out other users of the lost+found directory */
1549 	mutex_enter(&cachep->c_contentslock);
1550 
1551 	/* find a name we can use in lost+found */
1552 	if (rname)
1553 		namep = rname;
1554 	else
1555 		namep = "lostfile";
1556 	error = VOP_LOOKUP(cachep->c_lostfoundvp, namep, &nvp,
1557 	    NULL, 0, NULL, kcred, NULL, NULL, NULL);
1558 	if (error == 0)
1559 		VN_RELE(nvp);
1560 	if (error != ENOENT) {
1561 #define		MAXTRIES 1000
1562 		strp = namep;
1563 		for (index = 0; index < MAXTRIES; index++) {
1564 			(void) sprintf(namebuf, "%s.%" PRIx64, strp,
1565 			    gethrestime_sec() * cp->c_id.cid_fileno * index);
1566 			len = (int)strlen(namebuf) + 1;
1567 			if (len > MAXNAMELEN)
1568 				namep = &namebuf[len - MAXNAMELEN];
1569 			else
1570 				namep = namebuf;
1571 			error = VOP_LOOKUP(cachep->c_lostfoundvp, namep, &nvp,
1572 			    NULL, 0, NULL, kcred, NULL, NULL, NULL);
1573 			if (error == 0)
1574 				VN_RELE(nvp);
1575 			if (error == ENOENT)
1576 				break;
1577 		}
1578 		if (index == MAXTRIES) {
1579 			error = EIO;
1580 			mutex_exit(&cachep->c_contentslock);
1581 			goto out;
1582 		}
1583 	}
1584 
1585 	/* get the name of the front file */
1586 	make_ascii_name(&cp->c_id, oname);
1587 
1588 	/* rename the file into the lost+found directory */
1589 	error = VOP_RENAME(fgp->fg_dirvp, oname, cachep->c_lostfoundvp,
1590 	    namep, kcred, NULL, 0);
1591 	if (error) {
1592 		mutex_exit(&cachep->c_contentslock);
1593 		goto out;
1594 	}
1595 	mutex_exit(&cachep->c_contentslock);
1596 
1597 	/* copy out the new name */
1598 	if (rname)
1599 		(void) strcpy(rname, namep);
1600 
1601 out:
1602 	/* clean up */
1603 	cachefs_cnode_stale(cp);
1604 
1605 	if (namebuf)
1606 		cachefs_kmem_free(namebuf, MAXNAMELEN * 2);
1607 
1608 #if 0 /* XXX until we can put filesystem in read-only mode */
1609 	if (error) {
1610 		/* XXX put file system in read-only mode */
1611 	}
1612 #endif
1613 
1614 	return (error);
1615 }
1616 
1617 /*
1618  * Traverses the list of cnodes on the fscache and calls the
1619  * specified routine with the held cnode.
1620  */
1621 void
1622 cachefs_cnode_traverse(fscache_t *fscp, void (*routinep)(cnode_t *))
1623 {
1624 	filegrp_t *fgp, *ofgp;
1625 	cnode_t *cp, *ocp;
1626 	int index;
1627 
1628 	/* lock the fscache while we traverse the file groups */
1629 	mutex_enter(&fscp->fs_fslock);
1630 
1631 	/* for each bucket of file groups */
1632 	for (index = 0; index < CFS_FS_FGP_BUCKET_SIZE; index++) {
1633 		ofgp = NULL;
1634 
1635 		/* for each file group in a bucket */
1636 		for (fgp = fscp->fs_filegrp[index];
1637 		    fgp != NULL;
1638 		    fgp = fgp->fg_next) {
1639 
1640 			/* hold the file group */
1641 			filegrp_hold(fgp);
1642 
1643 			/* drop fscache lock so others can use it */
1644 			mutex_exit(&fscp->fs_fslock);
1645 
1646 			/* drop hold on previous file group */
1647 			if (ofgp)
1648 				filegrp_rele(ofgp);
1649 			ofgp = fgp;
1650 
1651 			/* lock the cnode list while we traverse it */
1652 			mutex_enter(&fgp->fg_cnodelock);
1653 			ocp = NULL;
1654 
1655 			/* for each cnode in this file group */
1656 			for (cp = fgp->fg_cnodelist;
1657 			    cp != NULL;
1658 			    cp = cp->c_next) {
1659 
1660 				/* hold the cnode */
1661 				VN_HOLD(CTOV(cp));
1662 
1663 				/* drop cnode list lock so others can use it */
1664 				mutex_exit(&fgp->fg_cnodelock);
1665 
1666 				/* drop hold on previous cnode */
1667 				if (ocp) {
1668 					VN_RELE(CTOV(ocp));
1669 				}
1670 				ocp = cp;
1671 
1672 				/*
1673 				 * Execute routine for this cnode.
1674 				 * At this point no locks are held.
1675 				 */
1676 				(routinep)(cp);
1677 
1678 				/* reacquire the cnode list lock */
1679 				mutex_enter(&fgp->fg_cnodelock);
1680 			}
1681 
1682 			/* drop cnode list lock */
1683 			mutex_exit(&fgp->fg_cnodelock);
1684 
1685 			/* drop hold on last cnode */
1686 			if (ocp) {
1687 				VN_RELE(CTOV(ocp));
1688 			}
1689 
1690 			/* reacquire the fscache lock */
1691 			mutex_enter(&fscp->fs_fslock);
1692 		}
1693 
1694 		/* drop hold on last file group */
1695 		if (ofgp)
1696 			filegrp_rele(ofgp);
1697 	}
1698 	mutex_exit(&fscp->fs_fslock);
1699 }
1700 
1701 void
1702 cachefs_cnode_disable_caching(struct cnode *cp)
1703 {
1704 	mutex_enter(&cp->c_statelock);
1705 	cp->c_flags |= CN_NOCACHE;
1706 	if (cp->c_frontvp != NULL) {
1707 		VN_RELE(cp->c_frontvp);
1708 		cp->c_frontvp = NULL;
1709 	}
1710 	mutex_exit(&cp->c_statelock);
1711 }
1712 
1713 #define	TIMEMATCH(a, b)	((a)->tv_sec == (b)->tv_sec && \
1714 	(a)->tv_nsec == (b)->tv_nsec)
1715 
1716 static void
1717 cnode_enable_caching(struct cnode *cp)
1718 {
1719 	struct vnode *iovp;
1720 	struct filegrp *fgp;
1721 	struct cachefs_metadata md;
1722 	cachefscache_t *cachep = C_TO_FSCACHE(cp)->fs_cache;
1723 	int error;
1724 
1725 	ASSERT((cachep->c_flags & (CACHE_NOFILL | CACHE_NOCACHE)) == 0);
1726 	ASSERT(CFS_ISFS_BACKFS_NFSV4(C_TO_FSCACHE(cp)) == 0);
1727 
1728 	iovp = NULL;
1729 	if (CTOV(cp)->v_type == VREG)
1730 		iovp = cp->c_backvp;
1731 	if (iovp) {
1732 		(void) VOP_PUTPAGE(iovp, (offset_t)0,
1733 		    (uint_t)0, B_INVAL, kcred, NULL);
1734 	}
1735 	mutex_enter(&cp->c_statelock);
1736 	if (cp->c_backvp) {
1737 		VN_RELE(cp->c_backvp);
1738 		cp->c_backvp = NULL;
1739 	}
1740 	fgp = cp->c_filegrp;
1741 	ASSERT(fgp);
1742 	error = filegrp_read_metadata(fgp, &cp->c_id, &md);
1743 	if (error == 0) {
1744 		if ((cachep->c_flags & CACHE_CHECK_RLTYPE) &&
1745 		    (md.md_rlno != 0) &&
1746 		    (md.md_rltype == CACHEFS_RL_ACTIVE)) {
1747 			rl_entry_t *rlp, rl;
1748 
1749 			mutex_enter(&cachep->c_contentslock);
1750 			error = cachefs_rl_entry_get(cachep, md.md_rlno, &rlp);
1751 			if (error) {
1752 				mutex_exit(&cachep->c_contentslock);
1753 				goto out;
1754 			}
1755 
1756 			rl = *rlp;
1757 			mutex_exit(&cachep->c_contentslock);
1758 
1759 			if (rl.rl_current != md.md_rltype) {
1760 				md.md_rltype = rl.rl_current;
1761 				cp->c_flags |= CN_UPDATED;
1762 			}
1763 		}
1764 
1765 		/*
1766 		 * A rudimentary consistency check
1767 		 * here.  If the cookie and mtime
1768 		 * from the cnode match those from the
1769 		 * cache metadata, we assume for now that
1770 		 * the cached data is OK.
1771 		 */
1772 		if (bcmp(&md.md_cookie.fid_data, &cp->c_cookie.fid_data,
1773 			(size_t)cp->c_cookie.fid_len) == 0 &&
1774 		    TIMEMATCH(&cp->c_attr.va_mtime, &md.md_vattr.va_mtime)) {
1775 			cp->c_metadata = md;
1776 		} else {
1777 			/*
1778 			 * Here we're skeptical about the validity of
1779 			 * the front file.
1780 			 * We'll keep the attributes already present in
1781 			 * the cnode, and bring along the parts of the
1782 			 * metadata that we need to eventually nuke this
1783 			 * bogus front file -- in inactive or getfrontfile,
1784 			 * whichever comes first...
1785 			 */
1786 			if (cp->c_frontvp != NULL) {
1787 				VN_RELE(cp->c_frontvp);
1788 				cp->c_frontvp = NULL;
1789 			}
1790 			cp->c_metadata.md_flags = md.md_flags;
1791 			cp->c_metadata.md_flags |= MD_NEEDATTRS;
1792 			cp->c_metadata.md_rlno = md.md_rlno;
1793 			cp->c_metadata.md_rltype = md.md_rltype;
1794 			cp->c_metadata.md_consttype = md.md_consttype;
1795 			cp->c_metadata.md_fid = md.md_fid;
1796 			cp->c_metadata.md_frontblks = md.md_frontblks;
1797 			cp->c_metadata.md_timestamp.tv_sec = 0;
1798 			cp->c_metadata.md_timestamp.tv_nsec = 0;
1799 			bzero(&cp->c_metadata.md_allocinfo,
1800 			    cp->c_metadata.md_allocents *
1801 			    sizeof (struct cachefs_allocmap));
1802 			cp->c_metadata.md_allocents = 0;
1803 			cp->c_metadata.md_flags &= ~MD_POPULATED;
1804 			if ((cp->c_metadata.md_rlno != 0) &&
1805 			    (cp->c_metadata.md_rltype == CACHEFS_RL_PACKED)) {
1806 				cachefs_rlent_moveto(cachep,
1807 				    CACHEFS_RL_PACKED_PENDING,
1808 				    cp->c_metadata.md_rlno,
1809 				    cp->c_metadata.md_frontblks);
1810 				cp->c_metadata.md_rltype =
1811 				    CACHEFS_RL_PACKED_PENDING;
1812 			}
1813 
1814 			cp->c_flags |= CN_UPDATED;
1815 #ifdef CFSDEBUG
1816 			CFS_DEBUG(CFSDEBUG_GENERAL) {
1817 				printf(
1818 				    "fileno %lld ignores cached data due "
1819 				    "to cookie and/or mtime mismatch\n",
1820 				    (longlong_t)cp->c_id.cid_fileno);
1821 			}
1822 #endif
1823 		}
1824 		if (cp->c_metadata.md_rltype == CACHEFS_RL_GC) {
1825 			cachefs_rlent_moveto(cachep, CACHEFS_RL_ACTIVE,
1826 			    cp->c_metadata.md_rlno,
1827 			    cp->c_metadata.md_frontblks);
1828 			cp->c_metadata.md_rltype = CACHEFS_RL_ACTIVE;
1829 			cp->c_flags |= CN_UPDATED;
1830 		}
1831 	}
1832 
1833 out:
1834 	cp->c_flags &= ~CN_NOCACHE;
1835 	mutex_exit(&cp->c_statelock);
1836 
1837 	(void) cachefs_pack_common(CTOV(cp), kcred);
1838 }
1839 
1840 void
1841 cachefs_enable_caching(struct fscache *fscp)
1842 {
1843 
1844 	/*
1845 	 * This function is only called when a remount occurs,
1846 	 * with "nocache" and "nofill" options configured
1847 	 * (currently these aren't supported). Since this
1848 	 * function can write into the cache, make sure that
1849 	 * its not in use with NFSv4.
1850 	 */
1851 	if (CFS_ISFS_BACKFS_NFSV4(fscp))
1852 		return;
1853 
1854 	/*
1855 	 * set up file groups so we can read them.  Note that general
1856 	 * users (makecfsnode) will *not* start using them (i.e., all
1857 	 * newly created cnodes will be NOCACHE)
1858 	 * until we "enable_caching_rw" below.
1859 	 */
1860 	mutex_enter(&fscp->fs_fslock);
1861 	filegrp_list_enable_caching_ro(fscp);
1862 	mutex_exit(&fscp->fs_fslock);
1863 
1864 	cachefs_cnode_traverse(fscp, cnode_enable_caching);
1865 
1866 	/* enable general use of the filegrps */
1867 	mutex_enter(&fscp->fs_fslock);
1868 	filegrp_list_enable_caching_rw(fscp);
1869 	mutex_exit(&fscp->fs_fslock);
1870 }
1871 
1872 /*
1873  * This function makes a cnode stale by performing the following tasks:
1874  *	1) remove the front file
1875  *	2) Remove any resource file entries
1876  *	3) Remove any metadata entry from the attrcache file
1877  * 	4) Set the stale bit in the cnode flags field
1878  */
1879 void
1880 cachefs_cnode_stale(cnode_t *cp)
1881 {
1882 	fscache_t *fscp = C_TO_FSCACHE(cp);
1883 	struct cachefs_metadata *mdp;
1884 
1885 	ASSERT(MUTEX_HELD(&cp->c_statelock));
1886 
1887 	/*
1888 	 * Remove a metadata entry if the file exists
1889 	 */
1890 	mdp = &cp->c_metadata;
1891 	if (mdp->md_rlno) {
1892 
1893 		ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
1894 
1895 		/*
1896 		 * destroy the frontfile
1897 		 */
1898 		cachefs_removefrontfile(mdp, &cp->c_id, cp->c_filegrp);
1899 		/*
1900 		 * Remove resource file entry
1901 		 */
1902 		cachefs_rlent_moveto(fscp->fs_cache, CACHEFS_RL_FREE,
1903 		    mdp->md_rlno, 0);
1904 		mdp->md_rlno = 0;
1905 		mdp->md_rltype = CACHEFS_RL_NONE;
1906 	}
1907 
1908 	/*
1909 	 * Remove attrcache metadata
1910 	 */
1911 	if (CFS_ISFS_BACKFS_NFSV4(fscp) == 0)
1912 		(void) filegrp_destroy_metadata(cp->c_filegrp, &cp->c_id);
1913 	mdp->md_flags = 0;
1914 
1915 	if (cp->c_frontvp) {
1916 		VN_RELE(cp->c_frontvp);
1917 		cp->c_frontvp = NULL;
1918 	}
1919 
1920 	/*
1921 	 * For NFSv4 need to hang on to the backvp until vn_rele()
1922 	 * frees this cnode.
1923 	 */
1924 	if (cp->c_backvp && !CFS_ISFS_BACKFS_NFSV4(fscp)) {
1925 		VN_RELE(cp->c_backvp);
1926 		cp->c_backvp = NULL;
1927 	}
1928 	if (cp->c_acldirvp) {
1929 		VN_RELE(cp->c_acldirvp);
1930 		cp->c_acldirvp = NULL;
1931 	}
1932 
1933 	cp->c_flags |= CN_STALE | CN_ALLOC_PENDING | CN_NOCACHE;
1934 }
1935 
1936 /*
1937  * Sets up the local attributes in the metadata from the attributes.
1938  */
1939 void
1940 cachefs_cnode_setlocalstats(cnode_t *cp)
1941 {
1942 	fscache_t *fscp = C_TO_FSCACHE(cp);
1943 	cachefs_metadata_t *mdp = &cp->c_metadata;
1944 
1945 	ASSERT(MUTEX_HELD(&cp->c_statelock));
1946 
1947 	/* allow over writing of local attributes if a remount occurred */
1948 	if (fscp->fs_info.fi_resettimes != mdp->md_resettimes) {
1949 		mdp->md_flags &= ~(MD_LOCALCTIME | MD_LOCALMTIME);
1950 		mdp->md_resettimes = fscp->fs_info.fi_resettimes;
1951 	}
1952 	if (fscp->fs_info.fi_resetfileno != mdp->md_resetfileno) {
1953 		mdp->md_flags &= ~MD_LOCALFILENO;
1954 		mdp->md_resetfileno = fscp->fs_info.fi_resetfileno;
1955 	}
1956 
1957 	/* overwrite old fileno and timestamps if not local versions */
1958 	if ((mdp->md_flags & MD_LOCALFILENO) == 0)
1959 		mdp->md_localfileno = mdp->md_vattr.va_nodeid;
1960 	if ((mdp->md_flags & MD_LOCALCTIME) == 0)
1961 		mdp->md_localctime = mdp->md_vattr.va_ctime;
1962 	if ((mdp->md_flags & MD_LOCALMTIME) == 0)
1963 		mdp->md_localmtime = mdp->md_vattr.va_mtime;
1964 	cp->c_flags |= CN_UPDATED;
1965 }
1966