xref: /titanic_44/usr/src/uts/common/fs/cachefs/cachefs_subr.c (revision 9584cebb1c69707f4c67306b661c2ed47d8676f1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/param.h>
27 #include <sys/types.h>
28 #include <sys/systm.h>
29 #include <sys/cred.h>
30 #include <sys/proc.h>
31 #include <sys/user.h>
32 #include <sys/vfs.h>
33 #include <sys/vnode.h>
34 #include <sys/pathname.h>
35 #include <sys/uio.h>
36 #include <sys/tiuser.h>
37 #include <sys/sysmacros.h>
38 #include <sys/kmem.h>
39 #include <sys/mount.h>
40 #include <sys/ioctl.h>
41 #include <sys/statvfs.h>
42 #include <sys/errno.h>
43 #include <sys/debug.h>
44 #include <sys/cmn_err.h>
45 #include <sys/utsname.h>
46 #include <sys/modctl.h>
47 #include <sys/file.h>
48 #include <sys/stat.h>
49 #include <sys/fcntl.h>
50 #include <sys/fbuf.h>
51 #include <sys/dnlc.h>
52 #include <sys/callb.h>
53 #include <sys/kobj.h>
54 #include <sys/rwlock.h>
55 
56 #include <sys/vmsystm.h>
57 #include <vm/hat.h>
58 #include <vm/as.h>
59 #include <vm/page.h>
60 #include <vm/pvn.h>
61 #include <vm/seg.h>
62 #include <vm/seg_map.h>
63 #include <vm/seg_vn.h>
64 #include <vm/rm.h>
65 #include <sys/fs/cachefs_fs.h>
66 #include <sys/fs/cachefs_log.h>
67 #include <sys/fs/cachefs_dir.h>
68 
69 extern struct seg *segkmap;
70 caddr_t segmap_getmap();
71 int segmap_release();
72 
73 extern struct cnode *cachefs_freeback;
74 extern struct cnode *cachefs_freefront;
75 extern cachefscache_t *cachefs_cachelist;
76 
77 #ifdef CFSDEBUG
78 int cachefsdebug = 0;
79 #endif
80 
81 int cachefs_max_threads = CFS_MAX_THREADS;
82 ino64_t cachefs_check_fileno = 0;
83 struct kmem_cache *cachefs_cache_kmcache = NULL;
84 struct kmem_cache *cachefs_req_cache = NULL;
85 
86 static int
87 cachefs_async_populate_reg(struct cachefs_populate_req *, cred_t *,
88     vnode_t *, vnode_t *);
89 
90 /*
91  * Cache routines
92  */
93 
94 /*
95  * ------------------------------------------------------------------
96  *
97  *		cachefs_cache_create
98  *
99  * Description:
100  *	Creates a cachefscache_t object and initializes it to
101  *	be NOCACHE and NOFILL mode.
102  * Arguments:
103  * Returns:
104  *	Returns a pointer to the created object or NULL if
105  *	threads could not be created.
106  * Preconditions:
107  */
108 
109 cachefscache_t *
110 cachefs_cache_create(void)
111 {
112 	cachefscache_t *cachep;
113 	struct cachefs_req *rp;
114 
115 	/* allocate zeroed memory for the object */
116 	cachep = kmem_cache_alloc(cachefs_cache_kmcache, KM_SLEEP);
117 
118 	bzero(cachep, sizeof (*cachep));
119 
120 	cv_init(&cachep->c_cwcv, NULL, CV_DEFAULT, NULL);
121 	cv_init(&cachep->c_cwhaltcv, NULL, CV_DEFAULT, NULL);
122 	mutex_init(&cachep->c_contentslock, NULL, MUTEX_DEFAULT, NULL);
123 	mutex_init(&cachep->c_fslistlock, NULL, MUTEX_DEFAULT, NULL);
124 	mutex_init(&cachep->c_log_mutex, NULL, MUTEX_DEFAULT, NULL);
125 
126 	/* set up the work queue and get the sync thread created */
127 	cachefs_workq_init(&cachep->c_workq);
128 	cachep->c_workq.wq_keepone = 1;
129 	cachep->c_workq.wq_cachep = cachep;
130 	rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
131 	rp->cfs_cmd = CFS_NOOP;
132 	rp->cfs_cr = kcred;
133 	rp->cfs_req_u.cu_fs_sync.cf_cachep = cachep;
134 	crhold(rp->cfs_cr);
135 	cachefs_addqueue(rp, &cachep->c_workq);
136 	cachep->c_flags |= CACHE_NOCACHE | CACHE_NOFILL | CACHE_ALLOC_PENDING;
137 
138 	return (cachep);
139 }
140 
141 /*
142  * ------------------------------------------------------------------
143  *
144  *		cachefs_cache_destroy
145  *
146  * Description:
147  *	Destroys the cachefscache_t object.
148  * Arguments:
149  *	cachep	the cachefscache_t object to destroy
150  * Returns:
151  * Preconditions:
152  *	precond(cachep)
153  */
154 
155 void
156 cachefs_cache_destroy(cachefscache_t *cachep)
157 {
158 	int error = 0;
159 #ifdef CFSRLDEBUG
160 	uint_t index;
161 #endif /* CFSRLDEBUG */
162 	clock_t wakeup = (60 * hz);
163 
164 	/* stop async threads */
165 	while (cachep->c_workq.wq_thread_count > 0)
166 		(void) cachefs_async_halt(&cachep->c_workq, 1);
167 
168 	/* kill off the cachep worker thread */
169 	mutex_enter(&cachep->c_contentslock);
170 	while (cachep->c_flags & CACHE_CACHEW_THREADRUN) {
171 		cachep->c_flags |= CACHE_CACHEW_THREADEXIT;
172 		cv_signal(&cachep->c_cwcv);
173 		(void) cv_reltimedwait(&cachep->c_cwhaltcv,
174 		    &cachep->c_contentslock, wakeup, TR_CLOCK_TICK);
175 	}
176 
177 	if ((cachep->c_flags & CACHE_ALLOC_PENDING) == 0) {
178 		cachep->c_usage.cu_flags &= ~CUSAGE_ACTIVE;
179 		(void) cachefs_cache_rssync(cachep);
180 	}
181 	mutex_exit(&cachep->c_contentslock);
182 
183 	/* if there is a cache */
184 	if ((cachep->c_flags & CACHE_NOCACHE) == 0) {
185 		if ((cachep->c_flags & CACHE_NOFILL) == 0) {
186 #ifdef CFSRLDEBUG
187 			/* blow away dangling rl debugging info */
188 			mutex_enter(&cachep->c_contentslock);
189 			for (index = 0;
190 			    index <= cachep->c_rlinfo.rl_entries;
191 			    index++) {
192 				rl_entry_t *rlent;
193 
194 				error = cachefs_rl_entry_get(cachep, index,
195 				    rlent);
196 				/*
197 				 * Since we are destroying the cache,
198 				 * better to ignore and proceed
199 				 */
200 				if (error)
201 					break;
202 				cachefs_rl_debug_destroy(rlent);
203 			}
204 			mutex_exit(&cachep->c_contentslock);
205 #endif /* CFSRLDEBUG */
206 
207 			/* sync the cache */
208 			if (!error)
209 				cachefs_cache_sync(cachep);
210 		} else {
211 			/* get rid of any unused fscache objects */
212 			mutex_enter(&cachep->c_fslistlock);
213 			fscache_list_gc(cachep);
214 			mutex_exit(&cachep->c_fslistlock);
215 		}
216 		ASSERT(cachep->c_fslist == NULL);
217 
218 		VN_RELE(cachep->c_resfilevp);
219 		VN_RELE(cachep->c_dirvp);
220 		VN_RELE(cachep->c_lockvp);
221 		VN_RELE(cachep->c_lostfoundvp);
222 	}
223 
224 	if (cachep->c_log_ctl != NULL)
225 		cachefs_kmem_free(cachep->c_log_ctl,
226 		    sizeof (cachefs_log_control_t));
227 	if (cachep->c_log != NULL)
228 		cachefs_log_destroy_cookie(cachep->c_log);
229 
230 	cv_destroy(&cachep->c_cwcv);
231 	cv_destroy(&cachep->c_cwhaltcv);
232 	mutex_destroy(&cachep->c_contentslock);
233 	mutex_destroy(&cachep->c_fslistlock);
234 	mutex_destroy(&cachep->c_log_mutex);
235 
236 	kmem_cache_free(cachefs_cache_kmcache, cachep);
237 }
238 
239 /*
240  * ------------------------------------------------------------------
241  *
242  *		cachefs_cache_active_ro
243  *
244  * Description:
245  *	Activates the cachefscache_t object for a read-only file system.
246  * Arguments:
247  *	cachep	the cachefscache_t object to activate
248  *	cdvp	the vnode of the cache directory
249  * Returns:
250  *	Returns 0 for success, !0 if there is a problem with the cache.
251  * Preconditions:
252  *	precond(cachep)
253  *	precond(cdvp)
254  *	precond(cachep->c_flags & CACHE_NOCACHE)
255  */
256 
257 int
258 cachefs_cache_activate_ro(cachefscache_t *cachep, vnode_t *cdvp)
259 {
260 	cachefs_log_control_t *lc;
261 	vnode_t *labelvp = NULL;
262 	vnode_t *rifvp = NULL;
263 	vnode_t *lockvp = NULL;
264 	vnode_t *statevp = NULL;
265 	vnode_t *lostfoundvp = NULL;
266 	struct vattr *attrp = NULL;
267 	int error;
268 
269 	ASSERT(cachep->c_flags & CACHE_NOCACHE);
270 	mutex_enter(&cachep->c_contentslock);
271 
272 	attrp = cachefs_kmem_alloc(sizeof (struct vattr), KM_SLEEP);
273 
274 	/* get the mode bits of the cache directory */
275 	attrp->va_mask = AT_ALL;
276 	error = VOP_GETATTR(cdvp, attrp, 0, kcred, NULL);
277 	if (error)
278 		goto out;
279 
280 	/* ensure the mode bits are 000 to keep out casual users */
281 	if (attrp->va_mode & S_IAMB) {
282 		cmn_err(CE_WARN, "cachefs: Cache Directory Mode must be 000\n");
283 		error = EPERM;
284 		goto out;
285 	}
286 
287 	/* Get the lock file */
288 	error = VOP_LOOKUP(cdvp, CACHEFS_LOCK_FILE, &lockvp, NULL, 0, NULL,
289 	    kcred, NULL, NULL, NULL);
290 	if (error) {
291 		cmn_err(CE_WARN, "cachefs: activate_a: cache corruption"
292 		    " run fsck.\n");
293 		goto out;
294 	}
295 
296 	/* Get the label file */
297 	error = VOP_LOOKUP(cdvp, CACHELABEL_NAME, &labelvp, NULL, 0, NULL,
298 	    kcred, NULL, NULL, NULL);
299 	if (error) {
300 		cmn_err(CE_WARN, "cachefs: activate_b: cache corruption"
301 		    " run fsck.\n");
302 		goto out;
303 	}
304 
305 	/* read in the label */
306 	error = vn_rdwr(UIO_READ, labelvp, (caddr_t)&cachep->c_label,
307 	    sizeof (struct cache_label), 0LL, UIO_SYSSPACE,
308 	    0, (rlim64_t)0, kcred, NULL);
309 	if (error) {
310 		cmn_err(CE_WARN, "cachefs: activate_c: cache corruption"
311 		    " run fsck.\n");
312 		goto out;
313 	}
314 
315 	/* Verify that we can handle the version this cache was created under */
316 	if (cachep->c_label.cl_cfsversion != CFSVERSION) {
317 		cmn_err(CE_WARN, "cachefs: Invalid Cache Version, run fsck\n");
318 		error = EINVAL;
319 		goto out;
320 	}
321 
322 	/* Open the resource file */
323 	error = VOP_LOOKUP(cdvp, RESOURCE_NAME, &rifvp, NULL, 0, NULL, kcred,
324 	    NULL, NULL, NULL);
325 	if (error) {
326 		cmn_err(CE_WARN, "cachefs: activate_d: cache corruption"
327 		    " run fsck.\n");
328 		goto out;
329 	}
330 
331 	/*  Read the usage struct for this cache */
332 	error = vn_rdwr(UIO_READ, rifvp, (caddr_t)&cachep->c_usage,
333 	    sizeof (struct cache_usage), 0LL, UIO_SYSSPACE, 0,
334 	    (rlim64_t)0, kcred, NULL);
335 	if (error) {
336 		cmn_err(CE_WARN, "cachefs: activate_e: cache corruption"
337 		    " run fsck.\n");
338 		goto out;
339 	}
340 
341 	if (cachep->c_usage.cu_flags & CUSAGE_ACTIVE) {
342 		cmn_err(CE_WARN, "cachefs: cache not clean.  Run fsck\n");
343 		/* ENOSPC is what UFS uses for clean flag check */
344 		error = ENOSPC;
345 		goto out;
346 	}
347 
348 	/*  Read the rlinfo for this cache */
349 	error = vn_rdwr(UIO_READ, rifvp, (caddr_t)&cachep->c_rlinfo,
350 	    sizeof (cachefs_rl_info_t), (offset_t)sizeof (struct cache_usage),
351 	    UIO_SYSSPACE, 0, 0, kcred, NULL);
352 	if (error) {
353 		cmn_err(CE_WARN, "cachefs: activate_f: cache corruption"
354 		    " run fsck.\n");
355 		goto out;
356 	}
357 
358 	/* Open the lost+found directory */
359 	error = VOP_LOOKUP(cdvp, CACHEFS_LOSTFOUND_NAME, &lostfoundvp,
360 	    NULL, 0, NULL, kcred, NULL, NULL, NULL);
361 	if (error) {
362 		cmn_err(CE_WARN, "cachefs: activate_g: cache corruption"
363 		    " run fsck.\n");
364 		goto out;
365 	}
366 
367 	VN_HOLD(rifvp);
368 	VN_HOLD(cdvp);
369 	VN_HOLD(lockvp);
370 	VN_HOLD(lostfoundvp);
371 	cachep->c_resfilevp = rifvp;
372 	cachep->c_dirvp = cdvp;
373 	cachep->c_lockvp = lockvp;
374 	cachep->c_lostfoundvp = lostfoundvp;
375 
376 	/* get the cachep worker thread created */
377 	cachep->c_flags |= CACHE_CACHEW_THREADRUN;
378 	(void) thread_create(NULL, 0, cachefs_cachep_worker_thread,
379 	    cachep, 0, &p0, TS_RUN, minclsyspri);
380 
381 	/* allocate the `logging control' field */
382 	mutex_enter(&cachep->c_log_mutex);
383 	cachep->c_log_ctl =
384 	    cachefs_kmem_zalloc(sizeof (cachefs_log_control_t), KM_SLEEP);
385 	lc = (cachefs_log_control_t *)cachep->c_log_ctl;
386 
387 	/* if the LOG_STATUS_NAME file exists, read it in and set up logging */
388 	error = VOP_LOOKUP(cachep->c_dirvp, LOG_STATUS_NAME, &statevp,
389 	    NULL, 0, NULL, kcred, NULL, NULL, NULL);
390 	if (error == 0) {
391 		int vnrw_error;
392 
393 		vnrw_error = vn_rdwr(UIO_READ, statevp, (caddr_t)lc,
394 		    sizeof (*lc), 0LL, UIO_SYSSPACE, 0, (rlim64_t)RLIM_INFINITY,
395 		    kcred, NULL);
396 		VN_RELE(statevp);
397 
398 		if (vnrw_error == 0) {
399 			if ((cachep->c_log = cachefs_log_create_cookie(lc))
400 			    == NULL)
401 				cachefs_log_error(cachep, ENOMEM, 0);
402 			else if ((lc->lc_magic != CACHEFS_LOG_MAGIC) ||
403 			    (lc->lc_path[0] != '/') ||
404 			    (cachefs_log_logfile_open(cachep,
405 			    lc->lc_path) != 0))
406 				cachefs_log_error(cachep, EINVAL, 0);
407 		}
408 	} else {
409 		error = 0;
410 	}
411 	lc->lc_magic = CACHEFS_LOG_MAGIC;
412 	lc->lc_cachep = (uint64_t)(uintptr_t)cachep;
413 	mutex_exit(&cachep->c_log_mutex);
414 
415 out:
416 	if (error == 0) {
417 		cachep->c_flags &= ~(CACHE_NOCACHE | CACHE_ALLOC_PENDING);
418 	}
419 	if (attrp)
420 		cachefs_kmem_free(attrp, sizeof (struct vattr));
421 	if (labelvp != NULL)
422 		VN_RELE(labelvp);
423 	if (rifvp != NULL)
424 		VN_RELE(rifvp);
425 	if (lockvp)
426 		VN_RELE(lockvp);
427 	if (lostfoundvp)
428 		VN_RELE(lostfoundvp);
429 
430 	mutex_exit(&cachep->c_contentslock);
431 	return (error);
432 }
433 
434 int
435 cachefs_stop_cache(cnode_t *cp)
436 {
437 	fscache_t *fscp = C_TO_FSCACHE(cp);
438 	cachefscache_t *cachep = fscp->fs_cache;
439 	filegrp_t *fgp;
440 	int i;
441 	int error = 0;
442 	clock_t wakeup = (60 * hz);
443 
444 	/* XXX verify lock-ordering for this function */
445 
446 	mutex_enter(&cachep->c_contentslock);
447 
448 	/*
449 	 * no work if we're already in nocache mode.  hopefully this
450 	 * will be the usual case.
451 	 */
452 
453 	if (cachep->c_flags & CACHE_NOCACHE) {
454 		mutex_exit(&cachep->c_contentslock);
455 		return (0);
456 	}
457 
458 	if ((cachep->c_flags & CACHE_NOFILL) == 0) {
459 		mutex_exit(&cachep->c_contentslock);
460 		return (EINVAL);
461 	}
462 
463 	mutex_exit(&cachep->c_contentslock);
464 
465 	/* We are already not caching if nfsv4 */
466 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
467 		return (0);
468 	}
469 
470 #ifdef CFSDEBUG
471 	mutex_enter(&cachep->c_fslistlock);
472 	ASSERT(fscp == cachep->c_fslist);
473 	ASSERT(fscp->fs_next == NULL);
474 	mutex_exit(&cachep->c_fslistlock);
475 
476 	printf("cachefs_stop_cache: resetting CACHE_NOCACHE\n");
477 #endif
478 
479 	/* XXX should i worry about disconnected during boot? */
480 	error = cachefs_cd_access(fscp, 1, 1);
481 	if (error)
482 		goto out;
483 
484 	error = cachefs_async_halt(&fscp->fs_workq, 1);
485 	ASSERT(error == 0);
486 	error = cachefs_async_halt(&cachep->c_workq, 1);
487 	ASSERT(error == 0);
488 	/* sigh -- best to keep going if async_halt failed. */
489 	error = 0;
490 
491 	/* XXX current order: cnode, fgp, fscp, cache. okay? */
492 
493 	cachefs_cnode_traverse(fscp, cachefs_cnode_disable_caching);
494 
495 	for (i = 0; i < CFS_FS_FGP_BUCKET_SIZE; i++) {
496 		for (fgp = fscp->fs_filegrp[i]; fgp != NULL;
497 		    fgp = fgp->fg_next) {
498 			mutex_enter(&fgp->fg_mutex);
499 
500 			ASSERT((fgp->fg_flags &
501 			    (CFS_FG_WRITE | CFS_FG_UPDATED)) == 0);
502 			fgp->fg_flags |=
503 			    CFS_FG_ALLOC_FILE |
504 			    CFS_FG_ALLOC_ATTR;
505 			fgp->fg_flags &= ~CFS_FG_READ;
506 
507 			if (fgp->fg_dirvp) {
508 				fgp->fg_flags |= CFS_FG_ALLOC_FILE;
509 				VN_RELE(fgp->fg_dirvp);
510 				fgp->fg_dirvp = NULL;
511 			}
512 			if (fgp->fg_attrvp) {
513 				fgp->fg_flags |= CFS_FG_ALLOC_ATTR;
514 				VN_RELE(fgp->fg_attrvp);
515 				fgp->fg_attrvp = NULL;
516 			}
517 
518 			mutex_exit(&fgp->fg_mutex);
519 		}
520 	}
521 
522 	mutex_enter(&fscp->fs_fslock);
523 	ASSERT((fscp->fs_flags & (CFS_FS_WRITE)) == 0);
524 	fscp->fs_flags &= ~(CFS_FS_READ | CFS_FS_DIRTYINFO);
525 
526 	if (fscp->fs_fscdirvp) {
527 		VN_RELE(fscp->fs_fscdirvp);
528 		fscp->fs_fscdirvp = NULL;
529 	}
530 	if (fscp->fs_fsattrdir) {
531 		VN_RELE(fscp->fs_fsattrdir);
532 		fscp->fs_fsattrdir = NULL;
533 	}
534 	if (fscp->fs_infovp) {
535 		VN_RELE(fscp->fs_infovp);
536 		fscp->fs_infovp = NULL;
537 	}
538 	/* XXX dlog stuff? */
539 
540 	mutex_exit(&fscp->fs_fslock);
541 
542 	/*
543 	 * release resources grabbed in cachefs_cache_activate_ro
544 	 */
545 
546 	mutex_enter(&cachep->c_contentslock);
547 
548 	/* kill off the cachep worker thread */
549 	while (cachep->c_flags & CACHE_CACHEW_THREADRUN) {
550 		cachep->c_flags |= CACHE_CACHEW_THREADEXIT;
551 		cv_signal(&cachep->c_cwcv);
552 		(void) cv_reltimedwait(&cachep->c_cwhaltcv,
553 		    &cachep->c_contentslock, wakeup, TR_CLOCK_TICK);
554 	}
555 
556 	if (cachep->c_resfilevp) {
557 		VN_RELE(cachep->c_resfilevp);
558 		cachep->c_resfilevp = NULL;
559 	}
560 	if (cachep->c_dirvp) {
561 		VN_RELE(cachep->c_dirvp);
562 		cachep->c_dirvp = NULL;
563 	}
564 	if (cachep->c_lockvp) {
565 		VN_RELE(cachep->c_lockvp);
566 		cachep->c_lockvp = NULL;
567 	}
568 	if (cachep->c_lostfoundvp) {
569 		VN_RELE(cachep->c_lostfoundvp);
570 		cachep->c_lostfoundvp = NULL;
571 	}
572 
573 	mutex_enter(&cachep->c_log_mutex);
574 	if (cachep->c_log_ctl) {
575 		cachefs_kmem_free(cachep->c_log_ctl,
576 		    sizeof (cachefs_log_control_t));
577 		cachep->c_log_ctl = NULL;
578 	}
579 	if (cachep->c_log) {
580 		cachefs_log_destroy_cookie(cachep->c_log);
581 		cachep->c_log = NULL;
582 	}
583 	mutex_exit(&cachep->c_log_mutex);
584 
585 	/* XXX do what mountroot_init does when ! foundcache */
586 
587 	cachep->c_flags |= CACHE_NOCACHE;
588 	mutex_exit(&cachep->c_contentslock);
589 
590 	/* XXX should i release this here? */
591 	cachefs_cd_release(fscp);
592 
593 out:
594 
595 	return (error);
596 }
597 
598 /*
599  * ------------------------------------------------------------------
600  *
601  *		cachefs_cache_active_rw
602  *
603  * Description:
604  *	Activates the cachefscache_t object for a read-write file system.
605  * Arguments:
606  *	cachep	the cachefscache_t object to activate
607  * Returns:
608  * Preconditions:
609  *	precond(cachep)
610  *	precond((cachep->c_flags & CACHE_NOCACHE) == 0)
611  *	precond(cachep->c_flags & CACHE_NOFILL)
612  */
613 
614 void
615 cachefs_cache_activate_rw(cachefscache_t *cachep)
616 {
617 	cachefs_rl_listhead_t *lhp;
618 
619 	ASSERT((cachep->c_flags & CACHE_NOCACHE) == 0);
620 	ASSERT(cachep->c_flags & CACHE_NOFILL);
621 
622 	mutex_enter(&cachep->c_contentslock);
623 	cachep->c_flags &= ~CACHE_NOFILL;
624 
625 	/* move the active list to the rl list */
626 	cachefs_rl_cleanup(cachep);
627 
628 	lhp = &cachep->c_rlinfo.rl_items[
629 	    CACHEFS_RL_INDEX(CACHEFS_RL_PACKED_PENDING)];
630 	if (lhp->rli_itemcnt != 0)
631 		cachep->c_flags |= CACHE_PACKED_PENDING;
632 	cachefs_cache_dirty(cachep, 0);
633 	mutex_exit(&cachep->c_contentslock);
634 }
635 
636 /*
637  * ------------------------------------------------------------------
638  *
639  *		cachefs_cache_dirty
640  *
641  * Description:
642  *	Marks the cache as dirty (active).
643  * Arguments:
644  *	cachep	the cachefscache_t to mark as dirty
645  *	lockit	1 means grab contents lock, 0 means caller grabbed it
646  * Returns:
647  * Preconditions:
648  *	precond(cachep)
649  *	precond(cache is in rw mode)
650  */
651 
652 void
653 cachefs_cache_dirty(struct cachefscache *cachep, int lockit)
654 {
655 	int error;
656 
657 	ASSERT((cachep->c_flags & (CACHE_NOCACHE | CACHE_NOFILL)) == 0);
658 
659 	if (lockit) {
660 		mutex_enter(&cachep->c_contentslock);
661 	} else {
662 		ASSERT(MUTEX_HELD(&cachep->c_contentslock));
663 	}
664 	if (cachep->c_flags & CACHE_DIRTY) {
665 		ASSERT(cachep->c_usage.cu_flags & CUSAGE_ACTIVE);
666 	} else {
667 		/*
668 		 * turn on the "cache active" (dirty) flag and write it
669 		 * synchronously to disk
670 		 */
671 		cachep->c_flags |= CACHE_DIRTY;
672 		cachep->c_usage.cu_flags |= CUSAGE_ACTIVE;
673 		if (error = vn_rdwr(UIO_WRITE, cachep->c_resfilevp,
674 		    (caddr_t)&cachep->c_usage, sizeof (struct cache_usage),
675 		    0LL, UIO_SYSSPACE, FSYNC, (rlim64_t)RLIM_INFINITY,
676 		    kcred, NULL)) {
677 			cmn_err(CE_WARN,
678 			    "cachefs: clean flag write error: %d\n", error);
679 		}
680 	}
681 
682 	if (lockit)
683 		mutex_exit(&cachep->c_contentslock);
684 }
685 
686 /*
687  * ------------------------------------------------------------------
688  *
689  *		cachefs_cache_rssync
690  *
691  * Description:
692  *	Syncs out the resource file for the cachefscache_t object.
693  * Arguments:
694  *	cachep	the cachefscache_t object to operate on
695  * Returns:
696  *	Returns 0 for success, !0 on an error writing data.
697  * Preconditions:
698  *	precond(cachep)
699  *	precond(cache is in rw mode)
700  */
701 
702 int
703 cachefs_cache_rssync(struct cachefscache *cachep)
704 {
705 	int error;
706 
707 	ASSERT((cachep->c_flags & (CACHE_NOCACHE | CACHE_NOFILL |
708 	    CACHE_ALLOC_PENDING)) == 0);
709 
710 	if (cachep->c_rl_entries != NULL) {
711 		error = vn_rdwr(UIO_WRITE, cachep->c_resfilevp,
712 		    (caddr_t)cachep->c_rl_entries, MAXBSIZE,
713 		    (offset_t)((cachep->c_rl_window + 1) * MAXBSIZE),
714 		    UIO_SYSSPACE, FSYNC, RLIM_INFINITY, kcred, NULL);
715 		if (error)
716 			cmn_err(CE_WARN,
717 			    "cachefs: Can't Write rl entries Info\n");
718 		cachefs_kmem_free(cachep->c_rl_entries, MAXBSIZE);
719 		cachep->c_rl_entries = NULL;
720 	}
721 
722 	/* write the usage struct for this cache */
723 	error = vn_rdwr(UIO_WRITE, cachep->c_resfilevp,
724 	    (caddr_t)&cachep->c_usage, sizeof (struct cache_usage),
725 	    0LL, UIO_SYSSPACE, 0, (rlim64_t)RLIM_INFINITY, kcred, NULL);
726 	if (error) {
727 		cmn_err(CE_WARN, "cachefs: Can't Write Cache Usage Info\n");
728 	}
729 
730 	/* write the rlinfo for this cache */
731 	error = vn_rdwr(UIO_WRITE, cachep->c_resfilevp,
732 	    (caddr_t)&cachep->c_rlinfo, sizeof (cachefs_rl_info_t),
733 	    (offset_t)sizeof (struct cache_usage), UIO_SYSSPACE,
734 	    0, (rlim64_t)RLIM_INFINITY, kcred, NULL);
735 	if (error) {
736 		cmn_err(CE_WARN, "cachefs: Can't Write Cache RL Info\n");
737 	}
738 	error = VOP_FSYNC(cachep->c_resfilevp, FSYNC, kcred, NULL);
739 	return (error);
740 }
741 
742 /*
743  * ------------------------------------------------------------------
744  *
745  *		cachefs_cache_sync
746  *
747  * Description:
748  *	Sync a cache which includes all of its fscaches.
749  * Arguments:
750  *	cachep	the cachefscache_t object to sync
751  * Returns:
752  * Preconditions:
753  *	precond(cachep)
754  *	precond(cache is in rw mode)
755  */
756 
757 void
758 cachefs_cache_sync(struct cachefscache *cachep)
759 {
760 	struct fscache *fscp;
761 	struct fscache **syncfsc;
762 	int nfscs, fscidx;
763 	int try;
764 	int done;
765 
766 	if (cachep->c_flags & (CACHE_NOCACHE | CACHE_NOFILL))
767 		return;
768 
769 	done = 0;
770 	for (try = 0; (try < 2) && !done; try++) {
771 
772 		nfscs = 0;
773 
774 		/*
775 		 * here we turn off the cache-wide DIRTY flag.  If it's still
776 		 * off when the sync completes we can write the clean flag to
777 		 * disk telling fsck it has no work to do.
778 		 */
779 #ifdef CFSCLEANFLAG
780 		mutex_enter(&cachep->c_contentslock);
781 		cachep->c_flags &= ~CACHE_DIRTY;
782 		mutex_exit(&cachep->c_contentslock);
783 #endif /* CFSCLEANFLAG */
784 
785 		cachefs_log_process_queue(cachep, 1);
786 
787 		mutex_enter(&cachep->c_fslistlock);
788 		syncfsc = cachefs_kmem_alloc(
789 		    cachep->c_refcnt * sizeof (struct fscache *), KM_SLEEP);
790 		for (fscp = cachep->c_fslist; fscp; fscp = fscp->fs_next) {
791 			fscache_hold(fscp);
792 			ASSERT(nfscs < cachep->c_refcnt);
793 			syncfsc[nfscs++] = fscp;
794 		}
795 		ASSERT(nfscs == cachep->c_refcnt);
796 		mutex_exit(&cachep->c_fslistlock);
797 		for (fscidx = 0; fscidx < nfscs; fscidx++) {
798 			fscp = syncfsc[fscidx];
799 			fscache_sync(fscp);
800 			fscache_rele(fscp);
801 		}
802 
803 		/* get rid of any unused fscache objects */
804 		mutex_enter(&cachep->c_fslistlock);
805 		fscache_list_gc(cachep);
806 		mutex_exit(&cachep->c_fslistlock);
807 
808 		/*
809 		 * here we check the cache-wide DIRTY flag.
810 		 * If it's off,
811 		 * we can write the clean flag to disk.
812 		 */
813 #ifdef CFSCLEANFLAG
814 		mutex_enter(&cachep->c_contentslock);
815 		if ((cachep->c_flags & CACHE_DIRTY) == 0) {
816 			if (cachep->c_usage.cu_flags & CUSAGE_ACTIVE) {
817 				cachep->c_usage.cu_flags &= ~CUSAGE_ACTIVE;
818 				if (cachefs_cache_rssync(cachep) == 0) {
819 					done = 1;
820 				} else {
821 					cachep->c_usage.cu_flags |=
822 					    CUSAGE_ACTIVE;
823 				}
824 			} else {
825 				done = 1;
826 			}
827 		}
828 		mutex_exit(&cachep->c_contentslock);
829 #else /* CFSCLEANFLAG */
830 		mutex_enter(&cachep->c_contentslock);
831 		(void) cachefs_cache_rssync(cachep);
832 		mutex_exit(&cachep->c_contentslock);
833 		done = 1;
834 #endif /* CFSCLEANFLAG */
835 		cachefs_kmem_free(syncfsc, nfscs * sizeof (struct fscache *));
836 	}
837 }
838 
839 /*
840  * ------------------------------------------------------------------
841  *
842  *		cachefs_cache_unique
843  *
844  * Description:
845  * Arguments:
846  * Returns:
847  *	Returns a unique number.
848  * Preconditions:
849  *	precond(cachep)
850  */
851 
852 uint_t
853 cachefs_cache_unique(cachefscache_t *cachep)
854 {
855 	uint_t unique = 0;
856 	int error = 0;
857 
858 	mutex_enter(&cachep->c_contentslock);
859 	if (cachep->c_usage.cu_flags & CUSAGE_NEED_ADJUST ||
860 	    ++(cachep->c_unique) == 0) {
861 		cachep->c_usage.cu_unique++;
862 
863 		if (cachep->c_unique == 0)
864 			cachep->c_unique = 1;
865 		cachep->c_flags &= ~CUSAGE_NEED_ADJUST;
866 		error = cachefs_cache_rssync(cachep);
867 	}
868 	if (error == 0)
869 		unique = (cachep->c_usage.cu_unique << 16) + cachep->c_unique;
870 	mutex_exit(&cachep->c_contentslock);
871 	return (unique);
872 }
873 
874 /*
875  * Called from c_getfrontfile. Shouldn't be called from anywhere else !
876  */
877 static int
878 cachefs_createfrontfile(cnode_t *cp, struct filegrp *fgp)
879 {
880 	char name[CFS_FRONTFILE_NAME_SIZE];
881 	struct vattr *attrp = NULL;
882 	int error = 0;
883 	int mode;
884 	int alloc = 0;
885 	int freefile = 0;
886 	int ffrele = 0;
887 	int rlfree = 0;
888 	rl_entry_t rl_ent;
889 
890 #ifdef CFSDEBUG
891 	CFS_DEBUG(CFSDEBUG_FRONT)
892 		printf("c_createfrontfile: ENTER cp %p fgp %p\n",
893 		    (void *)cp, (void *)fgp);
894 #endif
895 
896 	ASSERT(cp->c_frontvp == NULL);
897 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fgp->fg_fscp) == 0);
898 
899 	/* quit if we cannot write to the filegrp */
900 	if ((fgp->fg_flags & CFS_FG_WRITE) == 0) {
901 		error = ENOENT;
902 		goto out;
903 	}
904 
905 	/* find or create the filegrp attrcache file if necessary */
906 	if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) {
907 		error = filegrp_allocattr(fgp);
908 		if (error)
909 			goto out;
910 	}
911 
912 	make_ascii_name(&cp->c_id, name);
913 
914 	/* set up attributes for the front file we want to create */
915 	attrp = cachefs_kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
916 	alloc++;
917 	attrp->va_mode = S_IFREG | 0666;
918 	mode = 0666;
919 	attrp->va_uid = 0;
920 	attrp->va_gid = 0;
921 	attrp->va_type = VREG;
922 	attrp->va_size = 0;
923 	attrp->va_mask = AT_SIZE | AT_TYPE | AT_MODE | AT_UID | AT_GID;
924 
925 	/* get a file from the resource counts */
926 	error = cachefs_allocfile(fgp->fg_fscp->fs_cache);
927 	if (error) {
928 		error = EINVAL;
929 		goto out;
930 	}
931 	freefile++;
932 
933 	/* create the metadata slot if necessary */
934 	if (cp->c_flags & CN_ALLOC_PENDING) {
935 		error = filegrp_create_metadata(fgp, &cp->c_metadata,
936 		    &cp->c_id);
937 		if (error) {
938 			error = EINVAL;
939 			goto out;
940 		}
941 		cp->c_flags &= ~CN_ALLOC_PENDING;
942 		cp->c_flags |= CN_UPDATED;
943 	}
944 
945 	/* get an rl entry if necessary */
946 	if (cp->c_metadata.md_rlno == 0) {
947 		rl_ent.rl_fileno = cp->c_id.cid_fileno;
948 		rl_ent.rl_local = (cp->c_id.cid_flags & CFS_CID_LOCAL) ? 1 : 0;
949 		rl_ent.rl_fsid = fgp->fg_fscp->fs_cfsid;
950 		rl_ent.rl_attrc = 0;
951 		error = cachefs_rl_alloc(fgp->fg_fscp->fs_cache, &rl_ent,
952 		    &cp->c_metadata.md_rlno);
953 		if (error)
954 			goto out;
955 		cachefs_rlent_moveto(fgp->fg_fscp->fs_cache,
956 		    CACHEFS_RL_ACTIVE, cp->c_metadata.md_rlno,
957 		    cp->c_metadata.md_frontblks);
958 		cp->c_metadata.md_rltype = CACHEFS_RL_ACTIVE;
959 		rlfree++;
960 		cp->c_flags |= CN_UPDATED; /* XXX sam: do we need this? */
961 
962 		/* increment number of front files */
963 		error = filegrp_ffhold(fgp);
964 		if (error) {
965 			error = EINVAL;
966 			goto out;
967 		}
968 		ffrele++;
969 	}
970 
971 	if (cp->c_flags & CN_ASYNC_POP_WORKING) {
972 		/* lookup the already created front file */
973 		error = VOP_LOOKUP(fgp->fg_dirvp, name, &cp->c_frontvp,
974 		    NULL, 0, NULL, kcred, NULL, NULL, NULL);
975 	} else {
976 		/* create the front file */
977 		error = VOP_CREATE(fgp->fg_dirvp, name, attrp, EXCL, mode,
978 		    &cp->c_frontvp, kcred, 0, NULL, NULL);
979 	}
980 	if (error) {
981 #ifdef CFSDEBUG
982 		CFS_DEBUG(CFSDEBUG_FRONT)
983 			printf("c_createfrontfile: Can't create cached object"
984 			    " error %u, fileno %llx\n", error,
985 			    (u_longlong_t)cp->c_id.cid_fileno);
986 #endif
987 		goto out;
988 	}
989 
990 	/* get a copy of the fid of the front file */
991 	cp->c_metadata.md_fid.fid_len = MAXFIDSZ;
992 	error = VOP_FID(cp->c_frontvp, &cp->c_metadata.md_fid, NULL);
993 	if (error) {
994 		/*
995 		 * If we get back ENOSPC then the fid we passed in was too
996 		 * small.  For now we don't do anything and map to EINVAL.
997 		 */
998 		if (error == ENOSPC) {
999 			error = EINVAL;
1000 		}
1001 		goto out;
1002 	}
1003 
1004 	dnlc_purge_vp(cp->c_frontvp);
1005 
1006 	cp->c_metadata.md_flags |= MD_FILE;
1007 	cp->c_flags |= CN_UPDATED | CN_NEED_FRONT_SYNC;
1008 
1009 out:
1010 	if (error) {
1011 		if (cp->c_frontvp) {
1012 			VN_RELE(cp->c_frontvp);
1013 			(void) VOP_REMOVE(fgp->fg_dirvp, name, kcred, NULL, 0);
1014 			cp->c_frontvp = NULL;
1015 		}
1016 		if (ffrele)
1017 			filegrp_ffrele(fgp);
1018 		if (freefile)
1019 			cachefs_freefile(fgp->fg_fscp->fs_cache);
1020 		if (rlfree) {
1021 #ifdef CFSDEBUG
1022 			cachefs_rlent_verify(fgp->fg_fscp->fs_cache,
1023 			    CACHEFS_RL_ACTIVE, cp->c_metadata.md_rlno);
1024 #endif /* CFSDEBUG */
1025 			cachefs_rlent_moveto(fgp->fg_fscp->fs_cache,
1026 			    CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
1027 			cp->c_metadata.md_rlno = 0;
1028 			cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
1029 		}
1030 		cachefs_nocache(cp);
1031 	}
1032 	if (alloc)
1033 		cachefs_kmem_free(attrp, sizeof (struct vattr));
1034 #ifdef CFSDEBUG
1035 	CFS_DEBUG(CFSDEBUG_FRONT)
1036 		printf("c_createfrontfile: EXIT error = %d name %s\n", error,
1037 		    name);
1038 #endif
1039 	return (error);
1040 }
1041 
1042 /*
1043  * Releases resources associated with the front file.
1044  * Only call this routine if a ffhold has been done.
1045  * Its okay to call this routine if the front file does not exist.
1046  * Note: this routine is used even if there is no front file.
1047  */
1048 void
1049 cachefs_removefrontfile(cachefs_metadata_t *mdp, cfs_cid_t *cidp,
1050     filegrp_t *fgp)
1051 {
1052 	int error, enoent;
1053 	char name[CFS_FRONTFILE_NAME_SIZE + 2];
1054 
1055 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fgp->fg_fscp) == 0);
1056 
1057 	enoent = 0;
1058 	if (mdp->md_flags & MD_FILE) {
1059 		if (fgp->fg_dirvp == NULL) {
1060 			cmn_err(CE_WARN, "cachefs: remove error, run fsck\n");
1061 			return;
1062 		}
1063 		make_ascii_name(cidp, name);
1064 		error = VOP_REMOVE(fgp->fg_dirvp, name, kcred, NULL, 0);
1065 		if (error == ENOENT)
1066 			enoent = 1;
1067 		if ((error) && (error != ENOENT)) {
1068 			cmn_err(CE_WARN, "UFS remove error %s %d, run fsck\n",
1069 			    name, error);
1070 		}
1071 		if (mdp->md_flags & MD_ACLDIR) {
1072 			(void) strcat(name, ".d");
1073 			error = VOP_RMDIR(fgp->fg_dirvp, name, fgp->fg_dirvp,
1074 			    kcred, NULL, 0);
1075 			if ((error) && (error != ENOENT)) {
1076 				cmn_err(CE_WARN, "frontfs rmdir error %s %d"
1077 				    "; run fsck\n", name, error);
1078 			}
1079 		}
1080 		mdp->md_flags &= ~(MD_FILE | MD_POPULATED | MD_ACL | MD_ACLDIR);
1081 		bzero(&mdp->md_allocinfo, mdp->md_allocents *
1082 		    sizeof (struct cachefs_allocmap));
1083 		cachefs_freefile(fgp->fg_fscp->fs_cache);
1084 	}
1085 
1086 	/*
1087 	 * Clear packed bit, fastsymlinks and special files
1088 	 * do not have a front file.
1089 	 */
1090 	mdp->md_flags &= ~MD_PACKED;
1091 
1092 	/* XXX either rename routine or move this to caller */
1093 	if (enoent == 0)
1094 		filegrp_ffrele(fgp);
1095 
1096 	if (mdp->md_frontblks) {
1097 		cachefs_freeblocks(fgp->fg_fscp->fs_cache, mdp->md_frontblks,
1098 		    mdp->md_rltype);
1099 		mdp->md_frontblks = 0;
1100 	}
1101 }
1102 
1103 /*
1104  * This is the interface to the rest of CFS. This takes a cnode, and returns
1105  * the frontvp (stuffs it in the cnode). This creates an attrcache slot and
1106  * and frontfile if necessary.
1107  */
1108 
1109 int
1110 cachefs_getfrontfile(cnode_t *cp)
1111 {
1112 	struct filegrp *fgp = cp->c_filegrp;
1113 	int error;
1114 	struct vattr va;
1115 
1116 #ifdef CFSDEBUG
1117 	CFS_DEBUG(CFSDEBUG_SUBR)
1118 		printf("c_getfrontfile: ENTER cp %p\n", (void *)cp);
1119 #endif
1120 
1121 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fgp->fg_fscp) == 0);
1122 	ASSERT(MUTEX_HELD(&cp->c_statelock));
1123 
1124 	/*
1125 	 * Now we check to see if there is a front file for this entry.
1126 	 * If there is, we get the vnode for it and stick it in the cnode.
1127 	 * Otherwise, we create a front file, get the vnode for it and stick
1128 	 * it in the cnode.
1129 	 */
1130 	if (cp->c_flags & CN_STALE) {
1131 		cp->c_flags |= CN_NOCACHE;
1132 		error = ESTALE;
1133 		goto out;
1134 	}
1135 
1136 	/*
1137 	 * If the cnode is being populated, and we're not the populating
1138 	 * thread, then block until the pop thread completes.  If we are the
1139 	 * pop thread, then we may come in here, but not to nuke the directory
1140 	 * cnode at a critical juncture.  If we return from a cv_wait and the
1141 	 * cnode is now stale, don't bother trying to get the front file.
1142 	 */
1143 	while ((cp->c_flags & CN_ASYNC_POP_WORKING) &&
1144 	    (cp->c_popthrp != curthread)) {
1145 		cv_wait(&cp->c_popcv, &cp->c_statelock);
1146 		if (cp->c_flags & CN_STALE) {
1147 			cp->c_flags |= CN_NOCACHE;
1148 			error = ESTALE;
1149 			goto out;
1150 		}
1151 	}
1152 
1153 	if ((cp->c_metadata.md_flags & MD_FILE) == 0) {
1154 #ifdef CFSDEBUG
1155 		if (cp->c_frontvp != NULL)
1156 			CFS_DEBUG(CFSDEBUG_FRONT)
1157 				printf("c_getfrontfile: !MD_FILE and frontvp "
1158 				    "not null cp %p\n", (void *)cp);
1159 #endif
1160 		if (CTOV(cp)->v_type == VDIR)
1161 			ASSERT((cp->c_metadata.md_flags & MD_POPULATED) == 0);
1162 		error = cachefs_createfrontfile(cp, fgp);
1163 		if (error)
1164 			goto out;
1165 	} else {
1166 		/*
1167 		 * A front file exists, all we need to do is to grab the fid,
1168 		 * do a VFS_VGET() on the fid, stuff the vnode in the cnode,
1169 		 * and return.
1170 		 */
1171 		if (fgp->fg_dirvp == NULL) {
1172 			cmn_err(CE_WARN, "cachefs: gff0: corrupted file system"
1173 			    " run fsck\n");
1174 			cachefs_inval_object(cp);
1175 			cp->c_flags |= CN_NOCACHE;
1176 			error = ESTALE;
1177 			goto out;
1178 		}
1179 		error = VFS_VGET(fgp->fg_dirvp->v_vfsp, &cp->c_frontvp,
1180 		    &cp->c_metadata.md_fid);
1181 		if (error || (cp->c_frontvp == NULL)) {
1182 #ifdef CFSDEBUG
1183 			CFS_DEBUG(CFSDEBUG_FRONT)
1184 				printf("cachefs: "
1185 				    "gff1: front file system error %d\n",
1186 				    error);
1187 #endif /* CFSDEBUG */
1188 			cachefs_inval_object(cp);
1189 			cp->c_flags |= CN_NOCACHE;
1190 			error = ESTALE;
1191 			goto out;
1192 		}
1193 
1194 		/* don't need to check timestamps if need_front_sync is set */
1195 		if (cp->c_flags & CN_NEED_FRONT_SYNC) {
1196 			error = 0;
1197 			goto out;
1198 		}
1199 
1200 		/* don't need to check empty directories */
1201 		if (CTOV(cp)->v_type == VDIR &&
1202 		    ((cp->c_metadata.md_flags & MD_POPULATED) == 0)) {
1203 			error = 0;
1204 			goto out;
1205 		}
1206 
1207 		/* get modify time of the front file */
1208 		va.va_mask = AT_MTIME;
1209 		error = VOP_GETATTR(cp->c_frontvp, &va, 0, kcred, NULL);
1210 		if (error) {
1211 			cmn_err(CE_WARN, "cachefs: gff2: front file"
1212 			    " system error %d", error);
1213 			cachefs_inval_object(cp);
1214 			error = (cp->c_flags & CN_NOCACHE) ? ESTALE : 0;
1215 			goto out;
1216 		}
1217 
1218 		/* compare with modify time stored in metadata */
1219 		if (bcmp(&va.va_mtime, &cp->c_metadata.md_timestamp,
1220 		    sizeof (timestruc_t)) != 0) {
1221 #ifdef CFSDEBUG
1222 			CFS_DEBUG(CFSDEBUG_GENERAL | CFSDEBUG_INVALIDATE) {
1223 				long sec, nsec;
1224 				sec = cp->c_metadata.md_timestamp.tv_sec;
1225 				nsec = cp->c_metadata.md_timestamp.tv_nsec;
1226 				printf("c_getfrontfile: timestamps don't"
1227 				    " match fileno %lld va %lx %lx"
1228 				    " meta %lx %lx\n",
1229 				    (u_longlong_t)cp->c_id.cid_fileno,
1230 				    va.va_mtime.tv_sec,
1231 				    va.va_mtime.tv_nsec, sec, nsec);
1232 			}
1233 #endif
1234 			cachefs_inval_object(cp);
1235 			error = (cp->c_flags & CN_NOCACHE) ? ESTALE : 0;
1236 		}
1237 	}
1238 out:
1239 #ifdef CFSDEBUG
1240 	CFS_DEBUG(CFSDEBUG_FRONT)
1241 		printf("c_getfrontfile: EXIT error = %d\n", error);
1242 #endif
1243 	return (error);
1244 }
1245 
1246 void
1247 cachefs_inval_object(cnode_t *cp)
1248 {
1249 	cachefscache_t *cachep = C_TO_FSCACHE(cp)->fs_cache;
1250 	struct filegrp *fgp = cp->c_filegrp;
1251 	int error;
1252 
1253 	ASSERT(CFS_ISFS_BACKFS_NFSV4(C_TO_FSCACHE(cp)) == 0);
1254 	ASSERT(MUTEX_HELD(&cp->c_statelock));
1255 	ASSERT((cp->c_flags & CN_ASYNC_POP_WORKING) == 0 ||
1256 	    cp->c_popthrp == curthread);
1257 #if 0
1258 	CFS_DEBUG(CFSDEBUG_SUBR)
1259 		printf("c_inval_object: ENTER cp %p\n", (void *)cp);
1260 	if (cp->c_flags & (CN_ASYNC_POPULATE | CN_ASYNC_POP_WORKING))
1261 		debug_enter("inval object during async pop");
1262 #endif
1263 	cp->c_flags |= CN_NOCACHE;
1264 
1265 	/* if we cannot modify the cache */
1266 	if (C_TO_FSCACHE(cp)->fs_cache->c_flags &
1267 	    (CACHE_NOFILL | CACHE_NOCACHE)) {
1268 		goto out;
1269 	}
1270 
1271 	/* if there is a front file */
1272 	if (cp->c_metadata.md_flags & MD_FILE) {
1273 		if (fgp->fg_dirvp == NULL)
1274 			goto out;
1275 
1276 		/* get the front file vp if necessary */
1277 		if (cp->c_frontvp == NULL) {
1278 
1279 			error = VFS_VGET(fgp->fg_dirvp->v_vfsp, &cp->c_frontvp,
1280 			    &cp->c_metadata.md_fid);
1281 			if (error || (cp->c_frontvp == NULL)) {
1282 #ifdef CFSDEBUG
1283 				CFS_DEBUG(CFSDEBUG_FRONT)
1284 					printf("cachefs: "
1285 					    "io: front file error %d\n", error);
1286 #endif /* CFSDEBUG */
1287 				goto out;
1288 			}
1289 		}
1290 
1291 		/* truncate the file to zero size */
1292 		error = cachefs_frontfile_size(cp, 0);
1293 		if (error)
1294 			goto out;
1295 		cp->c_flags &= ~CN_NOCACHE;
1296 
1297 		/* if a directory, v_type is zero if called from initcnode */
1298 		if (cp->c_attr.va_type == VDIR) {
1299 			if (cp->c_usage < CFS_DIRCACHE_COST) {
1300 				cp->c_invals++;
1301 				if (cp->c_invals > CFS_DIRCACHE_INVAL) {
1302 					cp->c_invals = 0;
1303 				}
1304 			} else
1305 				cp->c_invals = 0;
1306 			cp->c_usage = 0;
1307 		}
1308 	} else {
1309 		cp->c_flags &= ~CN_NOCACHE;
1310 	}
1311 
1312 out:
1313 	if ((cp->c_metadata.md_flags & MD_PACKED) &&
1314 	    (cp->c_metadata.md_rltype != CACHEFS_RL_MODIFIED) &&
1315 	    ((cachep->c_flags & CACHE_NOFILL) == 0)) {
1316 		ASSERT(cp->c_metadata.md_rlno != 0);
1317 		if (cp->c_metadata.md_rltype != CACHEFS_RL_PACKED_PENDING) {
1318 			cachefs_rlent_moveto(cachep,
1319 			    CACHEFS_RL_PACKED_PENDING,
1320 			    cp->c_metadata.md_rlno,
1321 			    cp->c_metadata.md_frontblks);
1322 			cp->c_metadata.md_rltype = CACHEFS_RL_PACKED_PENDING;
1323 			/* unconditionally set CN_UPDATED below */
1324 		}
1325 	}
1326 
1327 	cachefs_purgeacl(cp);
1328 
1329 	if (cp->c_flags & CN_ASYNC_POP_WORKING)
1330 		cp->c_flags |= CN_NOCACHE;
1331 	cp->c_metadata.md_flags &= ~(MD_POPULATED | MD_INVALREADDIR |
1332 	    MD_FASTSYMLNK);
1333 	cp->c_flags &= ~CN_NEED_FRONT_SYNC;
1334 	cp->c_flags |= CN_UPDATED;
1335 
1336 	/*
1337 	 * If the object invalidated is a directory, the dnlc should be purged
1338 	 * to elide all references to this (directory) vnode.
1339 	 */
1340 	if (CTOV(cp)->v_type == VDIR)
1341 		dnlc_purge_vp(CTOV(cp));
1342 
1343 #ifdef CFSDEBUG
1344 	CFS_DEBUG(CFSDEBUG_SUBR)
1345 		printf("c_inval_object: EXIT\n");
1346 #endif
1347 }
1348 
1349 void
1350 make_ascii_name(cfs_cid_t *cidp, char *strp)
1351 {
1352 	int i = sizeof (uint_t) * 4;
1353 	u_longlong_t index;
1354 	ino64_t name;
1355 
1356 	if (cidp->cid_flags & CFS_CID_LOCAL)
1357 		*strp++ = 'L';
1358 	name = (ino64_t)cidp->cid_fileno;
1359 	do {
1360 		index = (((u_longlong_t)name) & 0xf000000000000000) >> 60;
1361 		index &= (u_longlong_t)0xf;
1362 		ASSERT(index < (u_longlong_t)16);
1363 		*strp++ = "0123456789abcdef"[index];
1364 		name <<= 4;
1365 	} while (--i);
1366 	*strp = '\0';
1367 }
1368 
1369 void
1370 cachefs_nocache(cnode_t *cp)
1371 {
1372 	fscache_t *fscp = C_TO_FSCACHE(cp);
1373 	cachefscache_t *cachep = fscp->fs_cache;
1374 
1375 #ifdef CFSDEBUG
1376 	CFS_DEBUG(CFSDEBUG_SUBR)
1377 		printf("c_nocache: ENTER cp %p\n", (void *)cp);
1378 #endif
1379 
1380 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
1381 	ASSERT(MUTEX_HELD(&cp->c_statelock));
1382 	if ((cp->c_flags & CN_NOCACHE) == 0) {
1383 #ifdef CFSDEBUG
1384 		CFS_DEBUG(CFSDEBUG_INVALIDATE)
1385 			printf("cachefs_nocache: invalidating %llu\n",
1386 			    (u_longlong_t)cp->c_id.cid_fileno);
1387 #endif
1388 		/*
1389 		 * Here we are waiting until inactive time to do
1390 		 * the inval_object.  In case we don't get to inactive
1391 		 * (because of a crash, say) we set up a timestamp mismatch
1392 		 * such that getfrontfile will blow the front file away
1393 		 * next time we try to use it.
1394 		 */
1395 		cp->c_metadata.md_timestamp.tv_sec = 0;
1396 		cp->c_metadata.md_timestamp.tv_nsec = 0;
1397 		cp->c_metadata.md_flags &= ~(MD_POPULATED | MD_INVALREADDIR |
1398 		    MD_FASTSYMLNK);
1399 		cp->c_flags &= ~CN_NEED_FRONT_SYNC;
1400 
1401 		cachefs_purgeacl(cp);
1402 
1403 		/*
1404 		 * It is possible we can nocache while disconnected.
1405 		 * A directory could be nocached by running out of space.
1406 		 * A regular file should only be nocached if an I/O error
1407 		 * occurs to the front fs.
1408 		 * We count on the item staying on the modified list
1409 		 * so we do not loose the cid to fid mapping for directories.
1410 		 */
1411 
1412 		if ((cp->c_metadata.md_flags & MD_PACKED) &&
1413 		    (cp->c_metadata.md_rltype != CACHEFS_RL_MODIFIED) &&
1414 		    ((cachep->c_flags & CACHE_NOFILL) == 0)) {
1415 			ASSERT(cp->c_metadata.md_rlno != 0);
1416 			if (cp->c_metadata.md_rltype !=
1417 			    CACHEFS_RL_PACKED_PENDING) {
1418 				cachefs_rlent_moveto(cachep,
1419 				    CACHEFS_RL_PACKED_PENDING,
1420 				    cp->c_metadata.md_rlno,
1421 				    cp->c_metadata.md_frontblks);
1422 				cp->c_metadata.md_rltype =
1423 				    CACHEFS_RL_PACKED_PENDING;
1424 				/* unconditionally set CN_UPDATED below */
1425 			}
1426 		}
1427 
1428 		if (CTOV(cp)->v_type == VDIR)
1429 			dnlc_purge_vp(CTOV(cp));
1430 		cp->c_flags |= (CN_NOCACHE | CN_UPDATED);
1431 	}
1432 
1433 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_NOCACHE))
1434 		cachefs_log_nocache(cachep, 0, fscp->fs_cfsvfsp,
1435 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno);
1436 
1437 #ifdef CFSDEBUG
1438 	CFS_DEBUG(CFSDEBUG_SUBR)
1439 		printf("c_nocache: EXIT cp %p\n", (void *)cp);
1440 #endif
1441 }
1442 
1443 /*
1444  * Checks to see if the page is in the disk cache, by checking the allocmap.
1445  */
1446 int
1447 cachefs_check_allocmap(cnode_t *cp, u_offset_t off)
1448 {
1449 	int i;
1450 	size_t dbl_size_to_look = cp->c_attr.va_size - off;
1451 	uint_t	size_to_look;
1452 
1453 	if (dbl_size_to_look > (u_offset_t)PAGESIZE)
1454 		size_to_look = (uint_t)PAGESIZE;
1455 	else
1456 		/*LINTED alignment okay*/
1457 		size_to_look = (uint_t)dbl_size_to_look;
1458 
1459 	for (i = 0; i < cp->c_metadata.md_allocents; i++) {
1460 		struct cachefs_allocmap *allocp =
1461 		    cp->c_metadata.md_allocinfo + i;
1462 
1463 		if (off >= allocp->am_start_off) {
1464 			if ((off + size_to_look) <=
1465 			    (allocp->am_start_off + allocp->am_size)) {
1466 				struct fscache *fscp = C_TO_FSCACHE(cp);
1467 				cachefscache_t *cachep = fscp->fs_cache;
1468 
1469 				if (CACHEFS_LOG_LOGGING(cachep,
1470 				    CACHEFS_LOG_CALLOC))
1471 					cachefs_log_calloc(cachep, 0,
1472 					    fscp->fs_cfsvfsp,
1473 					    &cp->c_metadata.md_cookie,
1474 					    cp->c_id.cid_fileno,
1475 					    off, size_to_look);
1476 			/*
1477 			 * Found the page in the CFS disk cache.
1478 			 */
1479 				return (1);
1480 			}
1481 		} else {
1482 			return (0);
1483 		}
1484 	}
1485 	return (0);
1486 }
1487 
1488 /*
1489  * Merges adjacent allocmap entries together where possible, e.g.
1490  *   offset=0x0,     size=0x40000
1491  *   offset=0x40000, size=0x20000	becomes just offset=0x0, size-0x90000
1492  *   offset=0x60000, size=0x30000
1493  */
1494 
1495 
1496 void
1497 cachefs_coalesce_allocmap(struct cachefs_metadata *cmd)
1498 {
1499 	int i, reduced = 0;
1500 	struct cachefs_allocmap *allocp, *nallocp;
1501 
1502 	nallocp = allocp = cmd->md_allocinfo;
1503 	allocp++;
1504 	for (i = 1; i < cmd->md_allocents; i++, allocp++) {
1505 		if (nallocp->am_start_off + nallocp->am_size ==
1506 		    allocp->am_start_off) {
1507 			nallocp->am_size += allocp->am_size;
1508 			reduced++;
1509 		} else {
1510 			nallocp++;
1511 			nallocp->am_start_off = allocp->am_start_off;
1512 			nallocp->am_size = allocp->am_size;
1513 		}
1514 	}
1515 	cmd->md_allocents -= reduced;
1516 }
1517 
1518 /*
1519  * Updates the allocmap to reflect a new chunk of data that has been
1520  * populated.
1521  */
1522 void
1523 cachefs_update_allocmap(cnode_t *cp, u_offset_t off, size_t size)
1524 {
1525 	int i;
1526 	struct cachefs_allocmap *allocp;
1527 	struct fscache *fscp =  C_TO_FSCACHE(cp);
1528 	cachefscache_t *cachep = fscp->fs_cache;
1529 	u_offset_t saveoff;
1530 	u_offset_t savesize;
1531 	u_offset_t logoff = off;
1532 	size_t logsize = size;
1533 	u_offset_t endoff;
1534 	u_offset_t tmpendoff;
1535 
1536 	/*
1537 	 * We try to see if we can coalesce the current block into an existing
1538 	 * allocation and mark it as such.
1539 	 * If we can't do that then we make a new entry in the allocmap.
1540 	 * when we run out of allocmaps, put the cnode in NOCACHE mode.
1541 	 */
1542 again:
1543 	allocp = cp->c_metadata.md_allocinfo;
1544 	for (i = 0; i < cp->c_metadata.md_allocents; i++, allocp++) {
1545 
1546 		if (off <= (allocp->am_start_off)) {
1547 			endoff = off + size;
1548 			if (endoff >= allocp->am_start_off) {
1549 				tmpendoff = allocp->am_start_off +
1550 				    allocp->am_size;
1551 				if (endoff < tmpendoff)
1552 					endoff = tmpendoff;
1553 				allocp->am_size = endoff - off;
1554 				allocp->am_start_off = off;
1555 				cachefs_coalesce_allocmap(&cp->c_metadata);
1556 				allocp = cp->c_metadata.md_allocinfo;
1557 				if (allocp->am_size >= cp->c_size)
1558 					cp->c_metadata.md_flags |= MD_POPULATED;
1559 				return;
1560 			} else {
1561 				saveoff = off;
1562 				savesize = size;
1563 				off = allocp->am_start_off;
1564 				size = allocp->am_size;
1565 				allocp->am_size = savesize;
1566 				allocp->am_start_off = saveoff;
1567 				goto again;
1568 			}
1569 		} else {
1570 			endoff = allocp->am_start_off + allocp->am_size;
1571 			if (off < endoff) {
1572 				tmpendoff = off + size;
1573 				if (endoff < tmpendoff)
1574 					endoff = tmpendoff;
1575 				allocp->am_size = endoff - allocp->am_start_off;
1576 				cachefs_coalesce_allocmap(&cp->c_metadata);
1577 				allocp = cp->c_metadata.md_allocinfo;
1578 				if (allocp->am_size >= cp->c_size)
1579 					cp->c_metadata.md_flags |= MD_POPULATED;
1580 				return;
1581 			}
1582 			if (off == (allocp->am_start_off + allocp->am_size)) {
1583 				allocp->am_size += size;
1584 				cachefs_coalesce_allocmap(&cp->c_metadata);
1585 				allocp = cp->c_metadata.md_allocinfo;
1586 				if (allocp->am_size >= cp->c_size)
1587 					cp->c_metadata.md_flags |= MD_POPULATED;
1588 				return;
1589 			}
1590 		}
1591 	}
1592 	if (i == C_MAX_ALLOCINFO_SLOTS) {
1593 #ifdef CFSDEBUG
1594 		CFS_DEBUG(CFSDEBUG_ALLOCMAP)
1595 			printf("c_update_alloc_map: "
1596 			    "Too many allinfo entries cp %p fileno %llu %p\n",
1597 			    (void *)cp, (u_longlong_t)cp->c_id.cid_fileno,
1598 			    (void *)cp->c_metadata.md_allocinfo);
1599 #endif
1600 		cachefs_nocache(cp);
1601 		return;
1602 	}
1603 	allocp->am_start_off = off;
1604 	allocp->am_size = (u_offset_t)size;
1605 	if (allocp->am_size >= cp->c_size)
1606 		cp->c_metadata.md_flags |= MD_POPULATED;
1607 	cp->c_metadata.md_allocents++;
1608 
1609 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_UALLOC))
1610 		cachefs_log_ualloc(cachep, 0, fscp->fs_cfsvfsp,
1611 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
1612 		    logoff, logsize);
1613 }
1614 
1615 /*
1616  * CFS population function
1617  *
1618  * before async population, this function used to turn on the cnode
1619  * flags CN_UPDATED, CN_NEED_FRONT_SYNC, and CN_POPULATION_PENDING.
1620  * now, however, it's the responsibility of the caller to do this if
1621  * this function returns 0 (no error).
1622  */
1623 
1624 int
1625 cachefs_populate(cnode_t *cp, u_offset_t off, size_t popsize, vnode_t *frontvp,
1626     vnode_t *backvp, u_offset_t cpsize, cred_t *cr)
1627 {
1628 	int error = 0;
1629 	caddr_t addr;
1630 	u_offset_t upto;
1631 	uint_t size;
1632 	u_offset_t from = off;
1633 	cachefscache_t *cachep = C_TO_FSCACHE(cp)->fs_cache;
1634 	ssize_t resid;
1635 	struct fbuf *fbp;
1636 	caddr_t buf = kmem_alloc(MAXBSIZE, KM_SLEEP);
1637 
1638 #ifdef CFSDEBUG
1639 	CFS_DEBUG(CFSDEBUG_VOPS)
1640 		printf("cachefs_populate: ENTER cp %p off %lld\n",
1641 		    (void *)cp, off);
1642 #endif
1643 
1644 	upto = MIN((off + popsize), cpsize);
1645 
1646 	while (from < upto) {
1647 		u_offset_t blkoff = (from & (offset_t)MAXBMASK);
1648 		uint_t n = from - blkoff;
1649 
1650 		size = upto - from;
1651 		if (upto > (blkoff + MAXBSIZE))
1652 			size = MAXBSIZE - n;
1653 
1654 		error = fbread(backvp, (offset_t)blkoff, n + size,
1655 		    S_OTHER, &fbp);
1656 		if (CFS_TIMEOUT(C_TO_FSCACHE(cp), error))
1657 			goto out;
1658 		else if (error) {
1659 #ifdef CFSDEBUG
1660 			CFS_DEBUG(CFSDEBUG_BACK)
1661 				printf("cachefs_populate: fbread error %d\n",
1662 				    error);
1663 #endif
1664 			goto out;
1665 		}
1666 
1667 		addr = fbp->fb_addr;
1668 		ASSERT(addr != NULL);
1669 		ASSERT(n + size <= MAXBSIZE);
1670 		bcopy(addr, buf, n + size);
1671 		fbrelse(fbp, S_OTHER);
1672 
1673 		if (n == 0 || cachefs_check_allocmap(cp, blkoff) == 0) {
1674 			if (error = cachefs_allocblocks(cachep, 1,
1675 			    cp->c_metadata.md_rltype))
1676 				goto out;
1677 			cp->c_metadata.md_frontblks++;
1678 		}
1679 		resid = 0;
1680 		error = vn_rdwr(UIO_WRITE, frontvp, buf + n, size,
1681 		    (offset_t)from, UIO_SYSSPACE, 0,
1682 		    (rlim64_t)RLIM64_INFINITY, cr, &resid);
1683 		if (error) {
1684 #ifdef CFSDEBUG
1685 			CFS_DEBUG(CFSDEBUG_FRONT)
1686 				printf("cachefs_populate: "
1687 				    "Got error = %d from vn_rdwr\n", error);
1688 #endif
1689 			goto out;
1690 		}
1691 #ifdef CFSDEBUG
1692 		if (resid)
1693 			CFS_DEBUG(CFSDEBUG_FRONT)
1694 				printf("cachefs_populate: non-zero resid %ld\n",
1695 				    resid);
1696 #endif
1697 		from += size;
1698 	}
1699 	(void) cachefs_update_allocmap(cp, off, upto - off);
1700 out:
1701 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_POPULATE))
1702 		cachefs_log_populate(cachep, error,
1703 		    C_TO_FSCACHE(cp)->fs_cfsvfsp,
1704 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno, off,
1705 		    popsize);
1706 
1707 #ifdef CFSDEBUG
1708 	CFS_DEBUG(CFSDEBUG_VOPS)
1709 		printf("cachefs_populate: EXIT cp %p error %d\n",
1710 		    (void *)cp, error);
1711 #endif
1712 	kmem_free(buf, MAXBSIZE);
1713 
1714 	return (error);
1715 }
1716 
1717 /*
1718  * due to compiler error we shifted cnode to the last argument slot.
1719  * occurred during large files project - XXX.
1720  */
1721 void
1722 cachefs_cluster_allocmap(u_offset_t off, u_offset_t *popoffp, size_t *popsizep,
1723     size_t size, struct cnode *cp)
1724 {
1725 	int i;
1726 	u_offset_t lastoff = 0;
1727 	u_offset_t forward_diff = 0;
1728 	u_offset_t backward_diff = 0;
1729 
1730 	ASSERT(size <= C_TO_FSCACHE(cp)->fs_info.fi_popsize);
1731 
1732 #ifdef CFSDEBUG
1733 	CFS_DEBUG(CFSDEBUG_SUBR)
1734 		printf("cachefs_cluster_allocmap: off %llx, size %llx, "
1735 		    "c_size %llx\n", off, size, (longlong_t)cp->c_size);
1736 #endif /* CFSDEBUG */
1737 	for (i = 0; i < cp->c_metadata.md_allocents; i++) {
1738 		struct cachefs_allocmap *allocp =
1739 		    cp->c_metadata.md_allocinfo + i;
1740 
1741 		if (allocp->am_start_off > off) {
1742 			if ((off + size) > allocp->am_start_off) {
1743 				forward_diff = allocp->am_start_off - off;
1744 				backward_diff = size - forward_diff;
1745 				if (backward_diff > off)
1746 					backward_diff = off;
1747 				if (lastoff > (off - backward_diff))
1748 					backward_diff = off - lastoff;
1749 			} else {
1750 				forward_diff = size;
1751 			}
1752 			*popoffp = (off - backward_diff) & (offset_t)PAGEMASK;
1753 			*popsizep = ((off + forward_diff) - *popoffp) &
1754 			    (offset_t)PAGEMASK;
1755 			return;
1756 		} else {
1757 			lastoff = allocp->am_start_off + allocp->am_size;
1758 		}
1759 	}
1760 	if ((lastoff + size) > off) {
1761 		*popoffp = (lastoff & (offset_t)PAGEMASK);
1762 	} else {
1763 		*popoffp = off & (offset_t)PAGEMASK;
1764 	}
1765 
1766 	/*
1767 	 * 64bit project: popsize is the chunk size used to populate the
1768 	 * cache (default 64K). As such, 32 bit should suffice.
1769 	 */
1770 	if ((*popoffp + size) > cp->c_size)
1771 		*popsizep = (cp->c_size - *popoffp + PAGEOFFSET) &
1772 		    (offset_t)PAGEMASK;
1773 	else if (size < PAGESIZE)
1774 		*popsizep = (size + PAGEOFFSET) & (offset_t)PAGEMASK;
1775 	else
1776 		*popsizep = size & (offset_t)PAGEMASK;
1777 
1778 #ifdef CFSDEBUG
1779 	CFS_DEBUG(CFSDEBUG_SUBR)
1780 		printf("cachefs_cluster_allocmap: popoff %llx, popsize %llx\n",
1781 		    (u_longlong_t)(*popoffp), (u_longlong_t)(*popsizep));
1782 #endif /* CFSDEBUG */
1783 }
1784 
1785 /*
1786  * "populate" a symlink in the cache
1787  */
1788 int
1789 cachefs_stuffsymlink(cnode_t *cp, caddr_t buf, int buflen)
1790 {
1791 	int error = 0;
1792 	struct fscache *fscp = C_TO_FSCACHE(cp);
1793 	cachefscache_t *cachep = fscp->fs_cache;
1794 	struct cachefs_metadata *mdp = &cp->c_metadata;
1795 
1796 	ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
1797 	ASSERT(MUTEX_HELD(&cp->c_statelock));
1798 
1799 	if (CFS_ISFS_BACKFS_NFSV4(fscp))
1800 		goto out;
1801 
1802 	if (cp->c_flags & CN_NOCACHE)
1803 		return (ENOENT);
1804 
1805 	cp->c_size = (u_offset_t)buflen;
1806 
1807 	/* if can create a fast sym link */
1808 	if (buflen <= C_FSL_SIZE) {
1809 		/* give up the front file resources */
1810 		if (mdp->md_rlno) {
1811 			cachefs_removefrontfile(mdp, &cp->c_id, cp->c_filegrp);
1812 			cachefs_rlent_moveto(cachep, CACHEFS_RL_FREE,
1813 			    mdp->md_rlno, 0);
1814 			mdp->md_rlno = 0;
1815 			mdp->md_rltype = CACHEFS_RL_NONE;
1816 		}
1817 		/* put sym link contents in allocinfo in metadata */
1818 		bzero(mdp->md_allocinfo, C_FSL_SIZE);
1819 		bcopy(buf, mdp->md_allocinfo, buflen);
1820 
1821 		mdp->md_flags |= MD_FASTSYMLNK;
1822 		cp->c_flags &= ~CN_NEED_FRONT_SYNC;
1823 		cp->c_flags |= CN_UPDATED;
1824 		goto out;
1825 	}
1826 
1827 	/* else create a sym link in a front file */
1828 	if (cp->c_frontvp == NULL)
1829 		error = cachefs_getfrontfile(cp);
1830 	if (error)
1831 		goto out;
1832 
1833 	/* truncate front file */
1834 	error = cachefs_frontfile_size(cp, 0);
1835 	mdp->md_flags &= ~(MD_FASTSYMLNK | MD_POPULATED);
1836 	if (error)
1837 		goto out;
1838 
1839 	/* get space for the sym link */
1840 	error = cachefs_allocblocks(cachep, 1, cp->c_metadata.md_rltype);
1841 	if (error)
1842 		goto out;
1843 
1844 	/* write the sym link to the front file */
1845 	error = vn_rdwr(UIO_WRITE, cp->c_frontvp, buf, buflen, 0,
1846 	    UIO_SYSSPACE, 0, RLIM_INFINITY, kcred, NULL);
1847 	if (error) {
1848 		cachefs_freeblocks(cachep, 1, cp->c_metadata.md_rltype);
1849 		goto out;
1850 	}
1851 
1852 	cp->c_metadata.md_flags |= MD_POPULATED;
1853 	cp->c_flags |= CN_NEED_FRONT_SYNC;
1854 	cp->c_flags |= CN_UPDATED;
1855 
1856 out:
1857 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_CSYMLINK))
1858 		cachefs_log_csymlink(cachep, error, fscp->fs_cfsvfsp,
1859 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno, buflen);
1860 
1861 	return (error);
1862 }
1863 
1864 /*
1865  * Reads the full contents of the symbolic link from the back file system.
1866  * *bufp is set to a MAXPATHLEN buffer that must be freed when done
1867  * *buflenp is the length of the link
1868  */
1869 int
1870 cachefs_readlink_back(cnode_t *cp, cred_t *cr, caddr_t *bufp, int *buflenp)
1871 {
1872 	int error;
1873 	struct uio uio;
1874 	struct iovec iov;
1875 	caddr_t buf;
1876 	fscache_t *fscp = C_TO_FSCACHE(cp);
1877 
1878 	ASSERT(MUTEX_HELD(&cp->c_statelock));
1879 
1880 	*bufp = NULL;
1881 
1882 	/* get back vnode */
1883 	if (cp->c_backvp == NULL) {
1884 		error = cachefs_getbackvp(fscp, cp);
1885 		if (error)
1886 			return (error);
1887 	}
1888 
1889 	/* set up for the readlink */
1890 	bzero(&uio, sizeof (struct uio));
1891 	bzero(&iov, sizeof (struct iovec));
1892 	buf = cachefs_kmem_alloc(MAXPATHLEN, KM_SLEEP);
1893 	iov.iov_base = buf;
1894 	iov.iov_len = MAXPATHLEN;
1895 	uio.uio_iov = &iov;
1896 	uio.uio_iovcnt = 1;
1897 	uio.uio_resid = MAXPATHLEN;
1898 	uio.uio_segflg = UIO_SYSSPACE;
1899 	uio.uio_loffset = 0;
1900 	uio.uio_fmode = 0;
1901 	uio.uio_extflg = UIO_COPY_CACHED;
1902 	uio.uio_llimit = MAXOFFSET_T;
1903 
1904 	/* get the link data */
1905 	CFS_DPRINT_BACKFS_NFSV4(fscp,
1906 	    ("cachefs_readlink (nfsv4): cnode %p, backvp %p\n",
1907 	    cp, cp->c_backvp));
1908 	error = VOP_READLINK(cp->c_backvp, &uio, cr, NULL);
1909 	if (error) {
1910 		cachefs_kmem_free(buf, MAXPATHLEN);
1911 	} else {
1912 		*bufp = buf;
1913 		/*LINTED alignment okay*/
1914 		*buflenp = MAXPATHLEN - (int)uio.uio_resid;
1915 	}
1916 
1917 	return (error);
1918 }
1919 
1920 int
1921 cachefs_getbackvp(struct fscache *fscp, struct cnode *cp)
1922 {
1923 	int error = 0;
1924 	int flag;
1925 
1926 #ifdef CFSDEBUG
1927 	CFS_DEBUG(CFSDEBUG_CHEAT | CFSDEBUG_BACK)
1928 		printf("cachefs_getbackvp: ENTER fscp %p cp %p\n",
1929 		    (void *)fscp, (void *)cp);
1930 #endif
1931 	ASSERT(cp != NULL);
1932 	ASSERT(MUTEX_HELD(&cp->c_statelock));
1933 	ASSERT(cp->c_backvp == NULL);
1934 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
1935 
1936 	/*
1937 	 * If destroy is set then the last link to a file has been
1938 	 * removed.  Oddly enough NFS will still return a vnode
1939 	 * for the file if the timeout has not expired.
1940 	 * This causes headaches for cachefs_push because the
1941 	 * vnode is really stale.
1942 	 * So we just short circuit the problem here.
1943 	 */
1944 	if (cp->c_flags & CN_DESTROY)
1945 		return (ESTALE);
1946 
1947 	ASSERT(fscp->fs_backvfsp);
1948 	if (fscp->fs_backvfsp == NULL)
1949 		return (ETIMEDOUT);
1950 	error = VFS_VGET(fscp->fs_backvfsp, &cp->c_backvp,
1951 	    (struct fid *)&cp->c_cookie);
1952 	if (cp->c_backvp && cp->c_cred &&
1953 	    ((cp->c_flags & CN_NEEDOPEN) || (cp->c_attr.va_type == VREG))) {
1954 		/*
1955 		 * XXX bob: really should pass in the correct flag,
1956 		 * fortunately nobody pays attention to it
1957 		 */
1958 		flag = 0;
1959 		/*
1960 		 * If NEEDOOPEN is set, then this file was opened VOP_OPEN'd
1961 		 * but the backvp was not.  So, for the sake of the vnode
1962 		 * open counts used by delegation, we need to OPEN the backvp
1963 		 * with the same flags that were used for this cnode.  That way
1964 		 * when the file is VOP_CLOSE'd the counts won't go negative.
1965 		 */
1966 		if (cp->c_flags & CN_NEEDOPEN) {
1967 			cp->c_flags &= ~CN_NEEDOPEN;
1968 			if (cp->c_rdcnt > 0) {
1969 				cp->c_rdcnt--;
1970 				flag |= FREAD;
1971 			}
1972 			if (cp->c_wrcnt > 0) {
1973 				cp->c_wrcnt--;
1974 				flag |= FWRITE;
1975 			}
1976 		}
1977 		error = VOP_OPEN(&cp->c_backvp, flag, cp->c_cred, NULL);
1978 		if (error) {
1979 			VN_RELE(cp->c_backvp);
1980 			cp->c_backvp = NULL;
1981 		}
1982 	}
1983 
1984 #ifdef CFSDEBUG
1985 	CFS_DEBUG(CFSDEBUG_GENERAL | CFSDEBUG_BACK) {
1986 		if (error || cp->c_backvp == NULL) {
1987 			printf("Stale cookie cp %p fileno %llu type %d \n",
1988 			    (void *)cp, (u_longlong_t)cp->c_id.cid_fileno,
1989 			    CTOV(cp)->v_type);
1990 		}
1991 	}
1992 #endif
1993 
1994 #ifdef CFSDEBUG
1995 	CFS_DEBUG(CFSDEBUG_CHEAT | CFSDEBUG_BACK)
1996 		printf("cachefs_getbackvp: EXIT error = %d\n", error);
1997 #endif
1998 	return (error);
1999 }
2000 
2001 int
2002 cachefs_getcookie(
2003 	vnode_t *vp,
2004 	struct fid *cookiep,
2005 	struct vattr *attrp,
2006 	cred_t *cr,
2007 	uint32_t valid_fid)
2008 {
2009 	int error = 0;
2010 
2011 #ifdef CFSDEBUG
2012 	CFS_DEBUG(CFSDEBUG_CHEAT)
2013 		printf("cachefs_getcookie: ENTER vp %p\n", (void *)vp);
2014 #endif
2015 	/*
2016 	 * Get the FID only if the caller has indicated it is valid,
2017 	 * otherwise, zero the cookie.
2018 	 */
2019 	if (valid_fid) {
2020 		/*
2021 		 * This assumes that the cookie is a full size fid, if we go to
2022 		 * variable length fids we will need to change this.
2023 		 */
2024 		cookiep->fid_len = MAXFIDSZ;
2025 		error = VOP_FID(vp, cookiep, NULL);
2026 	} else {
2027 		bzero(cookiep, sizeof (*cookiep));
2028 	}
2029 
2030 	if (!error) {
2031 		if (attrp) {
2032 			ASSERT(attrp != NULL);
2033 			attrp->va_mask = AT_ALL;
2034 			error = VOP_GETATTR(vp, attrp, 0, cr, NULL);
2035 		}
2036 	} else {
2037 		if (error == ENOSPC) {
2038 			/*
2039 			 * This is an indication that the underlying filesystem
2040 			 * needs a bigger fid.  For now just map to EINVAL.
2041 			 */
2042 			error = EINVAL;
2043 		}
2044 	}
2045 #ifdef CFSDEBUG
2046 	CFS_DEBUG(CFSDEBUG_CHEAT)
2047 		printf("cachefs_getcookie: EXIT error = %d\n", error);
2048 #endif
2049 	return (error);
2050 }
2051 
2052 void
2053 cachefs_workq_init(struct cachefs_workq *qp)
2054 {
2055 	qp->wq_head = qp->wq_tail = NULL;
2056 	qp->wq_length =
2057 	    qp->wq_thread_count =
2058 	    qp->wq_max_len =
2059 	    qp->wq_halt_request = 0;
2060 	qp->wq_keepone = 0;
2061 	cv_init(&qp->wq_req_cv, NULL, CV_DEFAULT, NULL);
2062 	cv_init(&qp->wq_halt_cv, NULL, CV_DEFAULT, NULL);
2063 	mutex_init(&qp->wq_queue_lock, NULL, MUTEX_DEFAULT, NULL);
2064 }
2065 
2066 /*
2067  * return non-zero if it's `okay' to queue more requests (policy)
2068  */
2069 
2070 static int cachefs_async_max = 512;
2071 static int cachefs_async_count = 0;
2072 kmutex_t cachefs_async_lock;
2073 
2074 int
2075 cachefs_async_okay(void)
2076 {
2077 	/*
2078 	 * a value of -1 for max means to ignore freemem
2079 	 */
2080 
2081 	if (cachefs_async_max == -1)
2082 		return (1);
2083 
2084 	if (freemem < minfree)
2085 		return (0);
2086 
2087 	/*
2088 	 * a value of 0 for max means no arbitrary limit (only `freemen')
2089 	 */
2090 
2091 	if (cachefs_async_max == 0)
2092 		return (1);
2093 
2094 	ASSERT(cachefs_async_max > 0);
2095 
2096 	/*
2097 	 * check the global count against the max.
2098 	 *
2099 	 * we don't need to grab cachefs_async_lock -- we're just
2100 	 * looking, and a little bit of `fuzz' is okay.
2101 	 */
2102 
2103 	if (cachefs_async_count >= cachefs_async_max)
2104 		return (0);
2105 
2106 	return (1);
2107 }
2108 
2109 void
2110 cachefs_async_start(struct cachefs_workq *qp)
2111 {
2112 	struct cachefs_req *rp;
2113 	int left;
2114 	callb_cpr_t cprinfo;
2115 
2116 	CALLB_CPR_INIT(&cprinfo, &qp->wq_queue_lock, callb_generic_cpr, "cas");
2117 	mutex_enter(&qp->wq_queue_lock);
2118 	left = 1;
2119 	for (;;) {
2120 		/* if there are no pending requests */
2121 		if ((qp->wq_head == NULL) && (qp->wq_logwork == 0)) {
2122 			/* see if thread should exit */
2123 			if (qp->wq_halt_request || (left == -1)) {
2124 				if ((qp->wq_thread_count > 1) ||
2125 				    (qp->wq_keepone == 0))
2126 					break;
2127 			}
2128 
2129 			/* wake up thread in async_halt if necessary */
2130 			if (qp->wq_halt_request)
2131 				cv_broadcast(&qp->wq_halt_cv);
2132 
2133 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
2134 			/* sleep until there is something to do */
2135 			left = cv_reltimedwait(&qp->wq_req_cv,
2136 			    &qp->wq_queue_lock, CFS_ASYNC_TIMEOUT,
2137 			    TR_CLOCK_TICK);
2138 			CALLB_CPR_SAFE_END(&cprinfo, &qp->wq_queue_lock);
2139 			if ((qp->wq_head == NULL) && (qp->wq_logwork == 0))
2140 				continue;
2141 		}
2142 		left = 1;
2143 
2144 		if (qp->wq_logwork) {
2145 			qp->wq_logwork = 0;
2146 			mutex_exit(&qp->wq_queue_lock);
2147 			cachefs_log_process_queue(qp->wq_cachep, 1);
2148 			mutex_enter(&qp->wq_queue_lock);
2149 			continue;
2150 		}
2151 
2152 		/* remove request from the list */
2153 		rp = qp->wq_head;
2154 		qp->wq_head = rp->cfs_next;
2155 		if (rp->cfs_next == NULL)
2156 			qp->wq_tail = NULL;
2157 
2158 		/* do the request */
2159 		mutex_exit(&qp->wq_queue_lock);
2160 		cachefs_do_req(rp);
2161 		mutex_enter(&qp->wq_queue_lock);
2162 
2163 		/* decrement count of requests */
2164 		qp->wq_length--;
2165 		mutex_enter(&cachefs_async_lock);
2166 		--cachefs_async_count;
2167 		mutex_exit(&cachefs_async_lock);
2168 	}
2169 	ASSERT(qp->wq_head == NULL);
2170 	qp->wq_thread_count--;
2171 	if (qp->wq_halt_request && qp->wq_thread_count == 0)
2172 		cv_broadcast(&qp->wq_halt_cv);
2173 	CALLB_CPR_EXIT(&cprinfo);
2174 	thread_exit();
2175 	/*NOTREACHED*/
2176 }
2177 
2178 /*
2179  * attempt to halt all the async threads associated with a given workq
2180  */
2181 int
2182 cachefs_async_halt(struct cachefs_workq *qp, int force)
2183 {
2184 	int error = 0;
2185 
2186 	mutex_enter(&qp->wq_queue_lock);
2187 	if (force)
2188 		qp->wq_keepone = 0;
2189 
2190 	if (qp->wq_thread_count > 0) {
2191 		qp->wq_halt_request++;
2192 		cv_broadcast(&qp->wq_req_cv);
2193 		(void) cv_reltimedwait(&qp->wq_halt_cv,
2194 		    &qp->wq_queue_lock, (60 * hz), TR_CLOCK_TICK);
2195 		qp->wq_halt_request--;
2196 		if (qp->wq_thread_count > 0) {
2197 			if ((qp->wq_thread_count == 1) &&
2198 			    (qp->wq_length == 0) && qp->wq_keepone)
2199 				error = EAGAIN;
2200 			else
2201 				error = EBUSY;
2202 		} else {
2203 			ASSERT(qp->wq_length == 0 && qp->wq_head == NULL);
2204 		}
2205 	}
2206 	mutex_exit(&qp->wq_queue_lock);
2207 	return (error);
2208 }
2209 
2210 void
2211 cachefs_addqueue(struct cachefs_req *rp, struct cachefs_workq *qp)
2212 {
2213 	mutex_enter(&qp->wq_queue_lock);
2214 	if (qp->wq_thread_count < cachefs_max_threads) {
2215 		if (qp->wq_thread_count == 0 ||
2216 		    (qp->wq_length >= (qp->wq_thread_count * 2))) {
2217 			(void) thread_create(NULL, 0, cachefs_async_start,
2218 			    qp, 0, &p0, TS_RUN, minclsyspri);
2219 			qp->wq_thread_count++;
2220 		}
2221 	}
2222 	mutex_enter(&rp->cfs_req_lock);
2223 	if (qp->wq_tail)
2224 		qp->wq_tail->cfs_next = rp;
2225 	else
2226 		qp->wq_head = rp;
2227 	qp->wq_tail = rp;
2228 	rp->cfs_next = NULL;
2229 	qp->wq_length++;
2230 	if (qp->wq_length > qp->wq_max_len)
2231 		qp->wq_max_len = qp->wq_length;
2232 	mutex_enter(&cachefs_async_lock);
2233 	++cachefs_async_count;
2234 	mutex_exit(&cachefs_async_lock);
2235 
2236 	cv_signal(&qp->wq_req_cv);
2237 	mutex_exit(&rp->cfs_req_lock);
2238 	mutex_exit(&qp->wq_queue_lock);
2239 }
2240 
2241 void
2242 cachefs_async_putpage(struct cachefs_putpage_req *prp, cred_t *cr)
2243 {
2244 	struct cnode *cp = VTOC(prp->cp_vp);
2245 
2246 	ASSERT(CFS_ISFS_BACKFS_NFSV4(C_TO_FSCACHE(cp)) == 0);
2247 
2248 	(void) VOP_PUTPAGE(prp->cp_vp, prp->cp_off, prp->cp_len,
2249 	    prp->cp_flags, cr, NULL);
2250 
2251 	mutex_enter(&cp->c_iomutex);
2252 	if (--cp->c_nio == 0)
2253 		cv_broadcast(&cp->c_iocv);
2254 	if (prp->cp_off == 0 && prp->cp_len == 0 &&
2255 	    (cp->c_ioflags & CIO_PUTPAGES)) {
2256 		cp->c_ioflags &= ~CIO_PUTPAGES;
2257 	}
2258 	mutex_exit(&cp->c_iomutex);
2259 }
2260 
2261 void
2262 cachefs_async_populate(struct cachefs_populate_req *pop, cred_t *cr)
2263 {
2264 	struct cnode *cp = VTOC(pop->cpop_vp);
2265 	struct fscache *fscp = C_TO_FSCACHE(cp);
2266 	struct filegrp *fgp = cp->c_filegrp;
2267 	int error = 0; /* not returned -- used as a place-holder */
2268 	vnode_t *frontvp = NULL, *backvp = NULL;
2269 	int havelock = 0;
2270 	vattr_t va;
2271 
2272 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2273 
2274 	if (((cp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0) ||
2275 	    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
2276 		mutex_enter(&cp->c_statelock);
2277 		cp->c_flags &= ~CN_ASYNC_POPULATE;
2278 		mutex_exit(&cp->c_statelock);
2279 		return; /* goto out */
2280 	}
2281 
2282 	error = cachefs_cd_access(fscp, 0, 0);
2283 	if (error) {
2284 #ifdef CFSDEBUG
2285 		CFS_DEBUG(CFSDEBUG_ASYNCPOP)
2286 			printf("async_pop: cd_access: err %d con %d\n",
2287 			    error, fscp->fs_cdconnected);
2288 #endif /* CFSDEBUG */
2289 		mutex_enter(&cp->c_statelock);
2290 		cp->c_flags &= ~CN_ASYNC_POPULATE;
2291 		mutex_exit(&cp->c_statelock);
2292 		return; /* goto out */
2293 	}
2294 
2295 	/*
2296 	 * grab the statelock for some minimal things
2297 	 */
2298 
2299 	rw_enter(&cp->c_rwlock, RW_WRITER);
2300 	mutex_enter(&cp->c_statelock);
2301 	havelock = 1;
2302 
2303 	if ((cp->c_flags & CN_ASYNC_POPULATE) == 0)
2304 		goto out;
2305 
2306 	/* there can be only one */
2307 	ASSERT((cp->c_flags & CN_ASYNC_POP_WORKING) == 0);
2308 	cp->c_flags |= CN_ASYNC_POP_WORKING;
2309 	cp->c_popthrp = curthread;
2310 
2311 	if (cp->c_metadata.md_flags & MD_POPULATED)
2312 		goto out;
2313 
2314 	if (cp->c_flags & CN_NOCACHE) {
2315 #ifdef CFSDEBUG
2316 		CFS_DEBUG(CFSDEBUG_ASYNCPOP)
2317 			printf("cachefs_async_populate: nocache bit on\n");
2318 #endif /* CFSDEBUG */
2319 		error = EINVAL;
2320 		goto out;
2321 	}
2322 
2323 	if (cp->c_frontvp == NULL) {
2324 		if ((cp->c_metadata.md_flags & MD_FILE) == 0) {
2325 			struct cfs_cid cid = cp->c_id;
2326 
2327 			mutex_exit(&cp->c_statelock);
2328 			havelock = 0;
2329 
2330 			/*
2331 			 * if frontfile doesn't exist, drop the lock
2332 			 * to do some of the file creation stuff.
2333 			 */
2334 
2335 			if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) {
2336 				error = filegrp_allocattr(fgp);
2337 				if (error != 0)
2338 					goto out;
2339 			}
2340 			if (fgp->fg_flags & CFS_FG_ALLOC_FILE) {
2341 				mutex_enter(&fgp->fg_mutex);
2342 				if (fgp->fg_flags & CFS_FG_ALLOC_FILE) {
2343 					if (fgp->fg_header->ach_nffs == 0)
2344 						error = filegrpdir_create(fgp);
2345 					else
2346 						error = filegrpdir_find(fgp);
2347 					if (error != 0) {
2348 						mutex_exit(&fgp->fg_mutex);
2349 						goto out;
2350 					}
2351 				}
2352 				mutex_exit(&fgp->fg_mutex);
2353 			}
2354 
2355 			if (fgp->fg_dirvp != NULL) {
2356 				char name[CFS_FRONTFILE_NAME_SIZE];
2357 				struct vattr *attrp;
2358 
2359 				attrp = cachefs_kmem_zalloc(
2360 				    sizeof (struct vattr), KM_SLEEP);
2361 				attrp->va_mode = S_IFREG | 0666;
2362 				attrp->va_uid = 0;
2363 				attrp->va_gid = 0;
2364 				attrp->va_type = VREG;
2365 				attrp->va_size = 0;
2366 				attrp->va_mask =
2367 				    AT_SIZE | AT_TYPE | AT_MODE |
2368 				    AT_UID | AT_GID;
2369 
2370 				make_ascii_name(&cid, name);
2371 
2372 				(void) VOP_CREATE(fgp->fg_dirvp, name, attrp,
2373 				    EXCL, 0666, &frontvp, kcred, 0, NULL, NULL);
2374 
2375 				cachefs_kmem_free(attrp,
2376 				    sizeof (struct vattr));
2377 			}
2378 
2379 			mutex_enter(&cp->c_statelock);
2380 			havelock = 1;
2381 		}
2382 		error = cachefs_getfrontfile(cp);
2383 		ASSERT((error != 0) ||
2384 		    (frontvp == NULL) ||
2385 		    (frontvp == cp->c_frontvp));
2386 	}
2387 	if ((error != 0) || (cp->c_frontvp == NULL))
2388 		goto out;
2389 
2390 	if (frontvp != NULL)
2391 		VN_RELE(frontvp);
2392 
2393 	frontvp = cp->c_frontvp;
2394 	VN_HOLD(frontvp);
2395 
2396 	if (cp->c_backvp == NULL) {
2397 		error = cachefs_getbackvp(fscp, cp);
2398 		if ((error != 0) || (cp->c_backvp == NULL))
2399 			goto out;
2400 	}
2401 	backvp = cp->c_backvp;
2402 	VN_HOLD(backvp);
2403 
2404 	switch (pop->cpop_vp->v_type) {
2405 	case VREG:
2406 		mutex_exit(&cp->c_statelock);
2407 		havelock = 0;
2408 		error = cachefs_async_populate_reg(pop, cr, backvp, frontvp);
2409 		break;
2410 	case VDIR:
2411 		error = cachefs_async_populate_dir(pop, cr, backvp, frontvp);
2412 		mutex_exit(&cp->c_statelock);
2413 		havelock = 0;
2414 		break;
2415 	default:
2416 #ifdef CFSDEBUG
2417 		printf("cachefs_async_populate: warning: vnode type = %d\n",
2418 		    pop->cpop_vp->v_type);
2419 		ASSERT(0);
2420 #endif /* CFSDEBUG */
2421 		error = EINVAL;
2422 		break;
2423 	}
2424 
2425 	if (error != 0)
2426 		goto out;
2427 
2428 	error = VOP_FSYNC(frontvp, FSYNC, cr, NULL);
2429 	if (error != 0) {
2430 #ifdef CFSDEBUG
2431 		CFS_DEBUG(CFSDEBUG_ASYNCPOP)
2432 			printf("cachefs_async_populate: fsync\n");
2433 #endif /* CFSDEBUG */
2434 		goto out;
2435 	}
2436 
2437 	/* grab the lock and finish up */
2438 	mutex_enter(&cp->c_statelock);
2439 	havelock = 1;
2440 
2441 	/* if went nocache while lock was dropped, get out */
2442 	if ((cp->c_flags & CN_NOCACHE) || (cp->c_frontvp == NULL)) {
2443 		error = EINVAL;
2444 		goto out;
2445 	}
2446 
2447 	va.va_mask = AT_MTIME;
2448 	error = VOP_GETATTR(cp->c_frontvp, &va, 0, cr, NULL);
2449 	if (error) {
2450 #ifdef CFSDEBUG
2451 		CFS_DEBUG(CFSDEBUG_ASYNCPOP)
2452 			printf("cachefs_async_populate: getattr\n");
2453 #endif /* CFSDEBUG */
2454 		goto out;
2455 	}
2456 	cp->c_metadata.md_timestamp = va.va_mtime;
2457 	cp->c_metadata.md_flags |= MD_POPULATED;
2458 	cp->c_metadata.md_flags &= ~MD_INVALREADDIR;
2459 	cp->c_flags |= CN_UPDATED;
2460 
2461 out:
2462 	if (! havelock)
2463 		mutex_enter(&cp->c_statelock);
2464 
2465 	/* see if an error happened behind our backs */
2466 	if ((error == 0) && (cp->c_flags & CN_NOCACHE)) {
2467 #ifdef CFSDEBUG
2468 		CFS_DEBUG(CFSDEBUG_ASYNCPOP)
2469 			printf("cachefs_async_populate: "
2470 			    "nocache behind our backs\n");
2471 #endif /* CFSDEBUG */
2472 		error = EINVAL;
2473 	}
2474 
2475 	cp->c_flags &= ~(CN_NEED_FRONT_SYNC | CN_POPULATION_PENDING |
2476 	    CN_ASYNC_POPULATE | CN_ASYNC_POP_WORKING);
2477 	cp->c_popthrp = NULL;
2478 
2479 	if (error != 0)
2480 		cachefs_nocache(cp);
2481 
2482 	/* unblock any threads waiting for populate to finish */
2483 	cv_broadcast(&cp->c_popcv);
2484 	mutex_exit(&cp->c_statelock);
2485 	rw_exit(&cp->c_rwlock);
2486 	cachefs_cd_release(fscp);
2487 
2488 	if (backvp != NULL) {
2489 		VN_RELE(backvp);
2490 	}
2491 	if (frontvp != NULL) {
2492 		VN_RELE(frontvp);
2493 	}
2494 }
2495 
2496 /*
2497  * only to be called from cachefs_async_populate
2498  */
2499 
2500 static int
2501 cachefs_async_populate_reg(struct cachefs_populate_req *pop, cred_t *cr,
2502     vnode_t *backvp, vnode_t *frontvp)
2503 {
2504 	struct cnode *cp = VTOC(pop->cpop_vp);
2505 	int error = 0;
2506 	u_offset_t popoff;
2507 	size_t popsize;
2508 
2509 	cachefs_cluster_allocmap(pop->cpop_off, &popoff,
2510 	    &popsize, pop->cpop_size, cp);
2511 	if (popsize == 0) {
2512 #ifdef CFSDEBUG
2513 		CFS_DEBUG(CFSDEBUG_ASYNCPOP)
2514 			printf("cachefs_async_populate: popsize == 0\n");
2515 #endif /* CFSDEBUG */
2516 		goto out;
2517 	}
2518 
2519 	error = cachefs_populate(cp, popoff, popsize, frontvp, backvp,
2520 	    cp->c_size, cr);
2521 	if (error != 0) {
2522 #ifdef CFSDEBUG
2523 		CFS_DEBUG(CFSDEBUG_ASYNCPOP)
2524 			printf("cachefs_async_populate: cachefs_populate\n");
2525 #endif /* CFSDEBUG */
2526 		goto out;
2527 	}
2528 
2529 out:
2530 	return (error);
2531 }
2532 
2533 void
2534 cachefs_do_req(struct cachefs_req *rp)
2535 {
2536 	struct cachefscache *cachep;
2537 
2538 	mutex_enter(&rp->cfs_req_lock);
2539 	switch (rp->cfs_cmd) {
2540 	case CFS_INVALID:
2541 		panic("cachefs_do_req: CFS_INVALID operation on queue");
2542 		/*NOTREACHED*/
2543 	case CFS_CACHE_SYNC:
2544 		cachep = rp->cfs_req_u.cu_fs_sync.cf_cachep;
2545 		cachefs_cache_sync(cachep);
2546 		break;
2547 	case CFS_IDLE:
2548 		cachefs_cnode_idle(rp->cfs_req_u.cu_idle.ci_vp, rp->cfs_cr);
2549 		break;
2550 	case CFS_PUTPAGE:
2551 		cachefs_async_putpage(&rp->cfs_req_u.cu_putpage, rp->cfs_cr);
2552 		VN_RELE(rp->cfs_req_u.cu_putpage.cp_vp);
2553 		break;
2554 	case CFS_POPULATE:
2555 		cachefs_async_populate(&rp->cfs_req_u.cu_populate, rp->cfs_cr);
2556 		VN_RELE(rp->cfs_req_u.cu_populate.cpop_vp);
2557 		break;
2558 	case CFS_NOOP:
2559 		break;
2560 	default:
2561 		panic("c_do_req: Invalid CFS async operation");
2562 	}
2563 	crfree(rp->cfs_cr);
2564 	rp->cfs_cmd = CFS_INVALID;
2565 	mutex_exit(&rp->cfs_req_lock);
2566 	kmem_cache_free(cachefs_req_cache, rp);
2567 }
2568 
2569 
2570 
2571 
2572 ssize_t cachefs_mem_usage = 0;
2573 
2574 struct km_wrap {
2575 	size_t kw_size;
2576 	struct km_wrap *kw_other;
2577 };
2578 
2579 kmutex_t cachefs_kmem_lock;
2580 
2581 void *
2582 cachefs_kmem_alloc(size_t size, int flag)
2583 {
2584 #ifdef DEBUG
2585 	caddr_t mp = NULL;
2586 	struct km_wrap *kwp;
2587 	size_t n = (size + (2 * sizeof (struct km_wrap)) + 7) & ~7;
2588 
2589 	ASSERT(n >= (size + 8));
2590 	mp = kmem_alloc(n, flag);
2591 	if (mp == NULL) {
2592 		return (NULL);
2593 	}
2594 	/*LINTED alignment okay*/
2595 	kwp = (struct km_wrap *)mp;
2596 	kwp->kw_size = n;
2597 	/*LINTED alignment okay*/
2598 	kwp->kw_other = (struct km_wrap *)(mp + n - sizeof (struct km_wrap));
2599 	kwp = (struct km_wrap *)kwp->kw_other;
2600 	kwp->kw_size = n;
2601 	/*LINTED alignment okay*/
2602 	kwp->kw_other = (struct km_wrap *)mp;
2603 
2604 	mutex_enter(&cachefs_kmem_lock);
2605 	ASSERT(cachefs_mem_usage >= 0);
2606 	cachefs_mem_usage += n;
2607 	mutex_exit(&cachefs_kmem_lock);
2608 
2609 	return (mp + sizeof (struct km_wrap));
2610 #else /* DEBUG */
2611 	return (kmem_alloc(size, flag));
2612 #endif /* DEBUG */
2613 }
2614 
2615 void *
2616 cachefs_kmem_zalloc(size_t size, int flag)
2617 {
2618 #ifdef DEBUG
2619 	caddr_t mp = NULL;
2620 	struct km_wrap *kwp;
2621 	size_t n = (size + (2 * sizeof (struct km_wrap)) + 7) & ~7;
2622 
2623 	ASSERT(n >= (size + 8));
2624 	mp = kmem_zalloc(n, flag);
2625 	if (mp == NULL) {
2626 		return (NULL);
2627 	}
2628 	/*LINTED alignment okay*/
2629 	kwp = (struct km_wrap *)mp;
2630 	kwp->kw_size = n;
2631 	/*LINTED alignment okay*/
2632 	kwp->kw_other = (struct km_wrap *)(mp + n - sizeof (struct km_wrap));
2633 	kwp = (struct km_wrap *)kwp->kw_other;
2634 	kwp->kw_size = n;
2635 	/*LINTED alignment okay*/
2636 	kwp->kw_other = (struct km_wrap *)mp;
2637 
2638 	mutex_enter(&cachefs_kmem_lock);
2639 	ASSERT(cachefs_mem_usage >= 0);
2640 	cachefs_mem_usage += n;
2641 	mutex_exit(&cachefs_kmem_lock);
2642 
2643 	return (mp + sizeof (struct km_wrap));
2644 #else /* DEBUG */
2645 	return (kmem_zalloc(size, flag));
2646 #endif /* DEBUG */
2647 }
2648 
2649 void
2650 cachefs_kmem_free(void *mp, size_t size)
2651 {
2652 #ifdef DEBUG
2653 	struct km_wrap *front_kwp;
2654 	struct km_wrap *back_kwp;
2655 	size_t n = (size + (2 * sizeof (struct km_wrap)) + 7) & ~7;
2656 	void *p;
2657 
2658 	ASSERT(n >= (size + 8));
2659 	front_kwp = (struct km_wrap *)((uintptr_t)mp - sizeof (struct km_wrap));
2660 	back_kwp = (struct km_wrap *)
2661 	    ((uintptr_t)front_kwp + n - sizeof (struct km_wrap));
2662 
2663 	ASSERT(front_kwp->kw_other == back_kwp);
2664 	ASSERT(front_kwp->kw_size == n);
2665 	ASSERT(back_kwp->kw_other == front_kwp);
2666 	ASSERT(back_kwp->kw_size == n);
2667 
2668 	mutex_enter(&cachefs_kmem_lock);
2669 	cachefs_mem_usage -= n;
2670 	ASSERT(cachefs_mem_usage >= 0);
2671 	mutex_exit(&cachefs_kmem_lock);
2672 
2673 	p = front_kwp;
2674 	front_kwp->kw_size = back_kwp->kw_size = 0;
2675 	front_kwp->kw_other = back_kwp->kw_other = NULL;
2676 	kmem_free(p, n);
2677 #else /* DEBUG */
2678 	kmem_free(mp, size);
2679 #endif /* DEBUG */
2680 }
2681 
2682 char *
2683 cachefs_strdup(char *s)
2684 {
2685 	char *rc;
2686 
2687 	ASSERT(s != NULL);
2688 
2689 	rc = cachefs_kmem_alloc(strlen(s) + 1, KM_SLEEP);
2690 	(void) strcpy(rc, s);
2691 
2692 	return (rc);
2693 }
2694 
2695 int
2696 cachefs_stats_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
2697 {
2698 	struct fscache *fscp = (struct fscache *)ksp->ks_data;
2699 	cachefscache_t *cachep = fscp->fs_cache;
2700 	int	error = 0;
2701 
2702 	if (rw == KSTAT_WRITE) {
2703 		bcopy(buf, &fscp->fs_stats, sizeof (fscp->fs_stats));
2704 		cachep->c_gc_count = fscp->fs_stats.st_gc_count;
2705 		CACHEFS_CFS_TIME_TO_TIME_COPY(fscp->fs_stats.st_gc_time,
2706 		    cachep->c_gc_time);
2707 		CACHEFS_CFS_TIME_TO_TIME_COPY(fscp->fs_stats.st_gc_before_atime,
2708 		    cachep->c_gc_before);
2709 		CACHEFS_CFS_TIME_TO_TIME_COPY(fscp->fs_stats.st_gc_after_atime,
2710 		    cachep->c_gc_after);
2711 		return (error);
2712 	}
2713 
2714 	fscp->fs_stats.st_gc_count = cachep->c_gc_count;
2715 	CACHEFS_TIME_TO_CFS_TIME_COPY(cachep->c_gc_time,
2716 	    fscp->fs_stats.st_gc_time, error);
2717 	CACHEFS_TIME_TO_CFS_TIME_COPY(cachep->c_gc_before,
2718 	    fscp->fs_stats.st_gc_before_atime, error);
2719 	CACHEFS_TIME_TO_CFS_TIME_COPY(cachep->c_gc_after,
2720 	    fscp->fs_stats.st_gc_after_atime, error);
2721 	bcopy(&fscp->fs_stats, buf, sizeof (fscp->fs_stats));
2722 
2723 	return (error);
2724 }
2725 
2726 #ifdef DEBUG
2727 cachefs_debug_info_t *
2728 cachefs_debug_save(cachefs_debug_info_t *oldcdb, int chain,
2729     char *message, uint_t flags, int number, void *pointer,
2730     cachefscache_t *cachep, struct fscache *fscp, struct cnode *cp)
2731 {
2732 	cachefs_debug_info_t *cdb;
2733 
2734 	if ((chain) || (oldcdb == NULL))
2735 		cdb = cachefs_kmem_zalloc(sizeof (*cdb), KM_SLEEP);
2736 	else
2737 		cdb = oldcdb;
2738 	if (chain)
2739 		cdb->cdb_next = oldcdb;
2740 
2741 	if (message != NULL) {
2742 		if (cdb->cdb_message != NULL)
2743 			cachefs_kmem_free(cdb->cdb_message,
2744 			    strlen(cdb->cdb_message) + 1);
2745 		cdb->cdb_message = cachefs_kmem_alloc(strlen(message) + 1,
2746 		    KM_SLEEP);
2747 		(void) strcpy(cdb->cdb_message, message);
2748 	}
2749 	cdb->cdb_flags = flags;
2750 	cdb->cdb_int = number;
2751 	cdb->cdb_pointer = pointer;
2752 
2753 	cdb->cdb_count++;
2754 
2755 	cdb->cdb_cnode = cp;
2756 	if (cp != NULL) {
2757 		cdb->cdb_frontvp = cp->c_frontvp;
2758 		cdb->cdb_backvp = cp->c_backvp;
2759 	}
2760 	if (fscp != NULL)
2761 		cdb->cdb_fscp = fscp;
2762 	else if (cp != NULL)
2763 		cdb->cdb_fscp = C_TO_FSCACHE(cp);
2764 	if (cachep != NULL)
2765 		cdb->cdb_cachep = cachep;
2766 	else if (cdb->cdb_fscp != NULL)
2767 		cdb->cdb_cachep = cdb->cdb_fscp->fs_cache;
2768 
2769 	cdb->cdb_thread = curthread;
2770 	cdb->cdb_timestamp = gethrtime();
2771 	cdb->cdb_depth = getpcstack(cdb->cdb_stack, CACHEFS_DEBUG_DEPTH);
2772 
2773 	return (cdb);
2774 }
2775 
2776 void
2777 cachefs_debug_show(cachefs_debug_info_t *cdb)
2778 {
2779 	hrtime_t now = gethrtime();
2780 	timestruc_t ts;
2781 	int i;
2782 
2783 	while (cdb != NULL) {
2784 		hrt2ts(now - cdb->cdb_timestamp, &ts);
2785 		printf("cdb: %p count: %d timelapse: %ld.%9ld\n",
2786 		    (void *)cdb, cdb->cdb_count, ts.tv_sec, ts.tv_nsec);
2787 		if (cdb->cdb_message != NULL)
2788 			printf("message: %s", cdb->cdb_message);
2789 		printf("flags: %x int: %d pointer: %p\n",
2790 		    cdb->cdb_flags, cdb->cdb_int, (void *)cdb->cdb_pointer);
2791 
2792 		printf("cnode: %p fscp: %p cachep: %p\n",
2793 		    (void *)cdb->cdb_cnode,
2794 		    (void *)cdb->cdb_fscp, (void *)cdb->cdb_cachep);
2795 		printf("frontvp: %p backvp: %p\n",
2796 		    (void *)cdb->cdb_frontvp, (void *)cdb->cdb_backvp);
2797 
2798 		printf("thread: %p stack...\n", (void *)cdb->cdb_thread);
2799 		for (i = 0; i < cdb->cdb_depth; i++) {
2800 			ulong_t off;
2801 			char *sym;
2802 
2803 			sym = kobj_getsymname(cdb->cdb_stack[i], &off);
2804 			printf("%s+%lx\n", sym ? sym : "?", off);
2805 		}
2806 		delay(2*hz);
2807 		cdb = cdb->cdb_next;
2808 	}
2809 	debug_enter(NULL);
2810 }
2811 #endif /* DEBUG */
2812 
2813 /*
2814  * Changes the size of the front file.
2815  * Returns 0 for success or error if cannot set file size.
2816  * NOCACHE bit is ignored.
2817  * c_size is ignored.
2818  * statelock must be held, frontvp must be set.
2819  * File must be populated if setting to a size other than zero.
2820  */
2821 int
2822 cachefs_frontfile_size(cnode_t *cp, u_offset_t length)
2823 {
2824 	cachefscache_t *cachep = C_TO_FSCACHE(cp)->fs_cache;
2825 	vattr_t va;
2826 	size_t nblks, blkdelta;
2827 	int error = 0;
2828 	int alloc = 0;
2829 	struct cachefs_allocmap *allocp;
2830 
2831 	ASSERT(MUTEX_HELD(&cp->c_statelock));
2832 	ASSERT(cp->c_frontvp);
2833 
2834 	/* if growing the file, allocate space first, we charge for holes */
2835 	if (length) {
2836 		ASSERT(cp->c_metadata.md_flags & MD_POPULATED);
2837 
2838 		nblks = (length + MAXBSIZE - 1) / MAXBSIZE;
2839 		if (nblks > cp->c_metadata.md_frontblks) {
2840 			blkdelta = nblks - cp->c_metadata.md_frontblks;
2841 			error = cachefs_allocblocks(cachep, blkdelta,
2842 			    cp->c_metadata.md_rltype);
2843 			if (error)
2844 				goto out;
2845 			alloc = 1;
2846 		}
2847 	}
2848 
2849 	/* change the size of the front file */
2850 	va.va_mask = AT_SIZE;
2851 	va.va_size = length;
2852 	error = VOP_SETATTR(cp->c_frontvp, &va, 0, kcred, NULL);
2853 	if (error)
2854 		goto out;
2855 
2856 	/* zero out the alloc map */
2857 	bzero(&cp->c_metadata.md_allocinfo,
2858 	    cp->c_metadata.md_allocents * sizeof (struct cachefs_allocmap));
2859 	cp->c_metadata.md_allocents = 0;
2860 
2861 	if (length == 0) {
2862 		/* free up blocks */
2863 		if (cp->c_metadata.md_frontblks) {
2864 			cachefs_freeblocks(cachep, cp->c_metadata.md_frontblks,
2865 			    cp->c_metadata.md_rltype);
2866 			cp->c_metadata.md_frontblks = 0;
2867 		}
2868 	} else {
2869 		/* update number of blocks if shrinking file */
2870 		nblks = (length + MAXBSIZE - 1) / MAXBSIZE;
2871 		if (nblks < cp->c_metadata.md_frontblks) {
2872 			blkdelta = cp->c_metadata.md_frontblks - nblks;
2873 			cachefs_freeblocks(cachep, blkdelta,
2874 			    cp->c_metadata.md_rltype);
2875 			cp->c_metadata.md_frontblks = (uint_t)nblks;
2876 		}
2877 
2878 		/* fix up alloc map to reflect new size */
2879 		allocp = cp->c_metadata.md_allocinfo;
2880 		allocp->am_start_off = 0;
2881 		allocp->am_size = length;
2882 		cp->c_metadata.md_allocents = 1;
2883 	}
2884 	cp->c_flags |= CN_UPDATED | CN_NEED_FRONT_SYNC;
2885 
2886 out:
2887 	if (error && alloc)
2888 		cachefs_freeblocks(cachep, blkdelta, cp->c_metadata.md_rltype);
2889 	return (error);
2890 }
2891 
2892 /*ARGSUSED*/
2893 int
2894 cachefs_req_create(void *voidp, void *cdrarg, int kmflags)
2895 {
2896 	struct cachefs_req *rp = (struct cachefs_req *)voidp;
2897 
2898 	/*
2899 	 * XXX don't do this!  if you need this, you can't use this
2900 	 * constructor.
2901 	 */
2902 
2903 	bzero(rp, sizeof (struct cachefs_req));
2904 
2905 	mutex_init(&rp->cfs_req_lock, NULL, MUTEX_DEFAULT, NULL);
2906 	return (0);
2907 }
2908 
2909 /*ARGSUSED*/
2910 void
2911 cachefs_req_destroy(void *voidp, void *cdrarg)
2912 {
2913 	struct cachefs_req *rp = (struct cachefs_req *)voidp;
2914 
2915 	mutex_destroy(&rp->cfs_req_lock);
2916 }
2917