xref: /titanic_44/usr/src/uts/common/fs/cachefs/cachefs_subr.c (revision 22eb7cb54d8a6bcf6fe2674cb4b1f0cf2d85cfb6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/types.h>
30 #include <sys/systm.h>
31 #include <sys/cred.h>
32 #include <sys/proc.h>
33 #include <sys/user.h>
34 #include <sys/vfs.h>
35 #include <sys/vnode.h>
36 #include <sys/pathname.h>
37 #include <sys/uio.h>
38 #include <sys/tiuser.h>
39 #include <sys/sysmacros.h>
40 #include <sys/kmem.h>
41 #include <sys/mount.h>
42 #include <sys/ioctl.h>
43 #include <sys/statvfs.h>
44 #include <sys/errno.h>
45 #include <sys/debug.h>
46 #include <sys/cmn_err.h>
47 #include <sys/utsname.h>
48 #include <sys/modctl.h>
49 #include <sys/file.h>
50 #include <sys/stat.h>
51 #include <sys/fcntl.h>
52 #include <sys/fbuf.h>
53 #include <sys/dnlc.h>
54 #include <sys/callb.h>
55 #include <sys/kobj.h>
56 #include <sys/rwlock.h>
57 
58 #include <sys/vmsystm.h>
59 #include <vm/hat.h>
60 #include <vm/as.h>
61 #include <vm/page.h>
62 #include <vm/pvn.h>
63 #include <vm/seg.h>
64 #include <vm/seg_map.h>
65 #include <vm/seg_vn.h>
66 #include <vm/rm.h>
67 #include <sys/fs/cachefs_fs.h>
68 #include <sys/fs/cachefs_log.h>
69 #include <sys/fs/cachefs_dir.h>
70 
71 extern struct seg *segkmap;
72 caddr_t segmap_getmap();
73 int segmap_release();
74 
75 extern struct cnode *cachefs_freeback;
76 extern struct cnode *cachefs_freefront;
77 extern cachefscache_t *cachefs_cachelist;
78 
79 #ifdef CFSDEBUG
80 int cachefsdebug = 0;
81 #endif
82 
83 int cachefs_max_threads = CFS_MAX_THREADS;
84 ino64_t cachefs_check_fileno = 0;
85 struct kmem_cache *cachefs_cache_kmcache = NULL;
86 struct kmem_cache *cachefs_req_cache = NULL;
87 
88 static int
89 cachefs_async_populate_reg(struct cachefs_populate_req *, cred_t *,
90     vnode_t *, vnode_t *);
91 
92 /*
93  * Cache routines
94  */
95 
96 /*
97  * ------------------------------------------------------------------
98  *
99  *		cachefs_cache_create
100  *
101  * Description:
102  *	Creates a cachefscache_t object and initializes it to
103  *	be NOCACHE and NOFILL mode.
104  * Arguments:
105  * Returns:
106  *	Returns a pointer to the created object or NULL if
107  *	threads could not be created.
108  * Preconditions:
109  */
110 
111 cachefscache_t *
112 cachefs_cache_create(void)
113 {
114 	cachefscache_t *cachep;
115 	struct cachefs_req *rp;
116 
117 	/* allocate zeroed memory for the object */
118 	cachep = kmem_cache_alloc(cachefs_cache_kmcache, KM_SLEEP);
119 
120 	bzero(cachep, sizeof (*cachep));
121 
122 	cv_init(&cachep->c_cwcv, NULL, CV_DEFAULT, NULL);
123 	cv_init(&cachep->c_cwhaltcv, NULL, CV_DEFAULT, NULL);
124 	mutex_init(&cachep->c_contentslock, NULL, MUTEX_DEFAULT, NULL);
125 	mutex_init(&cachep->c_fslistlock, NULL, MUTEX_DEFAULT, NULL);
126 	mutex_init(&cachep->c_log_mutex, NULL, MUTEX_DEFAULT, NULL);
127 
128 	/* set up the work queue and get the sync thread created */
129 	cachefs_workq_init(&cachep->c_workq);
130 	cachep->c_workq.wq_keepone = 1;
131 	cachep->c_workq.wq_cachep = cachep;
132 	rp = kmem_cache_alloc(cachefs_req_cache, KM_SLEEP);
133 	rp->cfs_cmd = CFS_NOOP;
134 	rp->cfs_cr = kcred;
135 	rp->cfs_req_u.cu_fs_sync.cf_cachep = cachep;
136 	crhold(rp->cfs_cr);
137 	cachefs_addqueue(rp, &cachep->c_workq);
138 	cachep->c_flags |= CACHE_NOCACHE | CACHE_NOFILL | CACHE_ALLOC_PENDING;
139 
140 	return (cachep);
141 }
142 
143 /*
144  * ------------------------------------------------------------------
145  *
146  *		cachefs_cache_destroy
147  *
148  * Description:
149  *	Destroys the cachefscache_t object.
150  * Arguments:
151  *	cachep	the cachefscache_t object to destroy
152  * Returns:
153  * Preconditions:
154  *	precond(cachep)
155  */
156 
157 void
158 cachefs_cache_destroy(cachefscache_t *cachep)
159 {
160 	clock_t tend;
161 	int error = 0;
162 #ifdef CFSRLDEBUG
163 	uint_t index;
164 #endif /* CFSRLDEBUG */
165 
166 	/* stop async threads */
167 	while (cachep->c_workq.wq_thread_count > 0)
168 		(void) cachefs_async_halt(&cachep->c_workq, 1);
169 
170 	/* kill off the cachep worker thread */
171 	mutex_enter(&cachep->c_contentslock);
172 	while (cachep->c_flags & CACHE_CACHEW_THREADRUN) {
173 		cachep->c_flags |= CACHE_CACHEW_THREADEXIT;
174 		cv_signal(&cachep->c_cwcv);
175 		tend = lbolt + (60 * hz);
176 		(void) cv_timedwait(&cachep->c_cwhaltcv,
177 			&cachep->c_contentslock, tend);
178 	}
179 
180 	if ((cachep->c_flags & CACHE_ALLOC_PENDING) == 0) {
181 		cachep->c_usage.cu_flags &= ~CUSAGE_ACTIVE;
182 		(void) cachefs_cache_rssync(cachep);
183 	}
184 	mutex_exit(&cachep->c_contentslock);
185 
186 	/* if there is a cache */
187 	if ((cachep->c_flags & CACHE_NOCACHE) == 0) {
188 		if ((cachep->c_flags & CACHE_NOFILL) == 0) {
189 #ifdef CFSRLDEBUG
190 			/* blow away dangling rl debugging info */
191 			mutex_enter(&cachep->c_contentslock);
192 			for (index = 0;
193 			    index <= cachep->c_rlinfo.rl_entries;
194 			    index++) {
195 				rl_entry_t *rlent;
196 
197 				error = cachefs_rl_entry_get(cachep, index,
198 									rlent);
199 				/*
200 				 * Since we are destroying the cache,
201 				 * better to ignore and proceed
202 				 */
203 				if (error)
204 					break;
205 				cachefs_rl_debug_destroy(rlent);
206 			}
207 			mutex_exit(&cachep->c_contentslock);
208 #endif /* CFSRLDEBUG */
209 
210 			/* sync the cache */
211 			if (!error)
212 				cachefs_cache_sync(cachep);
213 		} else {
214 			/* get rid of any unused fscache objects */
215 			mutex_enter(&cachep->c_fslistlock);
216 			fscache_list_gc(cachep);
217 			mutex_exit(&cachep->c_fslistlock);
218 		}
219 		ASSERT(cachep->c_fslist == NULL);
220 
221 		VN_RELE(cachep->c_resfilevp);
222 		VN_RELE(cachep->c_dirvp);
223 		VN_RELE(cachep->c_lockvp);
224 		VN_RELE(cachep->c_lostfoundvp);
225 	}
226 
227 	if (cachep->c_log_ctl != NULL)
228 		cachefs_kmem_free(cachep->c_log_ctl,
229 		    sizeof (cachefs_log_control_t));
230 	if (cachep->c_log != NULL)
231 		cachefs_log_destroy_cookie(cachep->c_log);
232 
233 	cv_destroy(&cachep->c_cwcv);
234 	cv_destroy(&cachep->c_cwhaltcv);
235 	mutex_destroy(&cachep->c_contentslock);
236 	mutex_destroy(&cachep->c_fslistlock);
237 	mutex_destroy(&cachep->c_log_mutex);
238 
239 	kmem_cache_free(cachefs_cache_kmcache, cachep);
240 }
241 
242 /*
243  * ------------------------------------------------------------------
244  *
245  *		cachefs_cache_active_ro
246  *
247  * Description:
248  *	Activates the cachefscache_t object for a read-only file system.
249  * Arguments:
250  *	cachep	the cachefscache_t object to activate
251  *	cdvp	the vnode of the cache directory
252  * Returns:
253  *	Returns 0 for success, !0 if there is a problem with the cache.
254  * Preconditions:
255  *	precond(cachep)
256  *	precond(cdvp)
257  *	precond(cachep->c_flags & CACHE_NOCACHE)
258  */
259 
260 int
261 cachefs_cache_activate_ro(cachefscache_t *cachep, vnode_t *cdvp)
262 {
263 	cachefs_log_control_t *lc;
264 	vnode_t *labelvp = NULL;
265 	vnode_t *rifvp = NULL;
266 	vnode_t *lockvp = NULL;
267 	vnode_t *statevp = NULL;
268 	vnode_t *lostfoundvp = NULL;
269 	struct vattr *attrp = NULL;
270 	int error;
271 
272 	ASSERT(cachep->c_flags & CACHE_NOCACHE);
273 	mutex_enter(&cachep->c_contentslock);
274 
275 	attrp = cachefs_kmem_alloc(sizeof (struct vattr), KM_SLEEP);
276 
277 	/* get the mode bits of the cache directory */
278 	attrp->va_mask = AT_ALL;
279 	error = VOP_GETATTR(cdvp, attrp, 0, kcred, NULL);
280 	if (error)
281 		goto out;
282 
283 	/* ensure the mode bits are 000 to keep out casual users */
284 	if (attrp->va_mode & S_IAMB) {
285 		cmn_err(CE_WARN, "cachefs: Cache Directory Mode must be 000\n");
286 		error = EPERM;
287 		goto out;
288 	}
289 
290 	/* Get the lock file */
291 	error = VOP_LOOKUP(cdvp, CACHEFS_LOCK_FILE, &lockvp, NULL, 0, NULL,
292 		kcred, NULL, NULL, NULL);
293 	if (error) {
294 		cmn_err(CE_WARN, "cachefs: activate_a: cache corruption"
295 			" run fsck.\n");
296 		goto out;
297 	}
298 
299 	/* Get the label file */
300 	error = VOP_LOOKUP(cdvp, CACHELABEL_NAME, &labelvp, NULL, 0, NULL,
301 		kcred, NULL, NULL, NULL);
302 	if (error) {
303 		cmn_err(CE_WARN, "cachefs: activate_b: cache corruption"
304 			" run fsck.\n");
305 		goto out;
306 	}
307 
308 	/* read in the label */
309 	error = vn_rdwr(UIO_READ, labelvp, (caddr_t)&cachep->c_label,
310 			sizeof (struct cache_label), 0LL, UIO_SYSSPACE,
311 			0, (rlim64_t)0, kcred, NULL);
312 	if (error) {
313 		cmn_err(CE_WARN, "cachefs: activate_c: cache corruption"
314 			" run fsck.\n");
315 		goto out;
316 	}
317 
318 	/* Verify that we can handle the version this cache was created under */
319 	if (cachep->c_label.cl_cfsversion != CFSVERSION) {
320 		cmn_err(CE_WARN, "cachefs: Invalid Cache Version, run fsck\n");
321 		error = EINVAL;
322 		goto out;
323 	}
324 
325 	/* Open the resource file */
326 	error = VOP_LOOKUP(cdvp, RESOURCE_NAME, &rifvp, NULL, 0, NULL, kcred,
327 	    NULL, NULL, NULL);
328 	if (error) {
329 		cmn_err(CE_WARN, "cachefs: activate_d: cache corruption"
330 			" run fsck.\n");
331 		goto out;
332 	}
333 
334 	/*  Read the usage struct for this cache */
335 	error = vn_rdwr(UIO_READ, rifvp, (caddr_t)&cachep->c_usage,
336 			sizeof (struct cache_usage), 0LL, UIO_SYSSPACE, 0,
337 			(rlim64_t)0, kcred, NULL);
338 	if (error) {
339 		cmn_err(CE_WARN, "cachefs: activate_e: cache corruption"
340 			" run fsck.\n");
341 		goto out;
342 	}
343 
344 	if (cachep->c_usage.cu_flags & CUSAGE_ACTIVE) {
345 		cmn_err(CE_WARN, "cachefs: cache not clean.  Run fsck\n");
346 		/* ENOSPC is what UFS uses for clean flag check */
347 		error = ENOSPC;
348 		goto out;
349 	}
350 
351 	/*  Read the rlinfo for this cache */
352 	error = vn_rdwr(UIO_READ, rifvp, (caddr_t)&cachep->c_rlinfo,
353 	sizeof (cachefs_rl_info_t), (offset_t)sizeof (struct cache_usage),
354 			UIO_SYSSPACE, 0, 0, kcred, NULL);
355 	if (error) {
356 		cmn_err(CE_WARN, "cachefs: activate_f: cache corruption"
357 			" run fsck.\n");
358 		goto out;
359 	}
360 
361 	/* Open the lost+found directory */
362 	error = VOP_LOOKUP(cdvp, CACHEFS_LOSTFOUND_NAME, &lostfoundvp,
363 	    NULL, 0, NULL, kcred, NULL, NULL, NULL);
364 	if (error) {
365 		cmn_err(CE_WARN, "cachefs: activate_g: cache corruption"
366 			" run fsck.\n");
367 		goto out;
368 	}
369 
370 	VN_HOLD(rifvp);
371 	VN_HOLD(cdvp);
372 	VN_HOLD(lockvp);
373 	VN_HOLD(lostfoundvp);
374 	cachep->c_resfilevp = rifvp;
375 	cachep->c_dirvp = cdvp;
376 	cachep->c_lockvp = lockvp;
377 	cachep->c_lostfoundvp = lostfoundvp;
378 
379 	/* get the cachep worker thread created */
380 	cachep->c_flags |= CACHE_CACHEW_THREADRUN;
381 	(void) thread_create(NULL, 0, cachefs_cachep_worker_thread,
382 	    cachep, 0, &p0, TS_RUN, minclsyspri);
383 
384 	/* allocate the `logging control' field */
385 	mutex_enter(&cachep->c_log_mutex);
386 	cachep->c_log_ctl =
387 	    cachefs_kmem_zalloc(sizeof (cachefs_log_control_t), KM_SLEEP);
388 	lc = (cachefs_log_control_t *)cachep->c_log_ctl;
389 
390 	/* if the LOG_STATUS_NAME file exists, read it in and set up logging */
391 	error = VOP_LOOKUP(cachep->c_dirvp, LOG_STATUS_NAME, &statevp,
392 	    NULL, 0, NULL, kcred, NULL, NULL, NULL);
393 	if (error == 0) {
394 		int vnrw_error;
395 
396 		vnrw_error = vn_rdwr(UIO_READ, statevp, (caddr_t)lc,
397 		    sizeof (*lc), 0LL, UIO_SYSSPACE, 0, (rlim64_t)RLIM_INFINITY,
398 		    kcred, NULL);
399 		VN_RELE(statevp);
400 
401 		if (vnrw_error == 0) {
402 			if ((cachep->c_log = cachefs_log_create_cookie(lc))
403 			    == NULL)
404 				cachefs_log_error(cachep, ENOMEM, 0);
405 			else if ((lc->lc_magic != CACHEFS_LOG_MAGIC) ||
406 			    (lc->lc_path[0] != '/') ||
407 			    (cachefs_log_logfile_open(cachep,
408 			    lc->lc_path) != 0))
409 				cachefs_log_error(cachep, EINVAL, 0);
410 		}
411 	} else {
412 		error = 0;
413 	}
414 	lc->lc_magic = CACHEFS_LOG_MAGIC;
415 	lc->lc_cachep = (uint64_t)(uintptr_t)cachep;
416 	mutex_exit(&cachep->c_log_mutex);
417 
418 out:
419 	if (error == 0) {
420 		cachep->c_flags &= ~(CACHE_NOCACHE | CACHE_ALLOC_PENDING);
421 	}
422 	if (attrp)
423 		cachefs_kmem_free(attrp, sizeof (struct vattr));
424 	if (labelvp != NULL)
425 		VN_RELE(labelvp);
426 	if (rifvp != NULL)
427 		VN_RELE(rifvp);
428 	if (lockvp)
429 		VN_RELE(lockvp);
430 	if (lostfoundvp)
431 		VN_RELE(lostfoundvp);
432 
433 	mutex_exit(&cachep->c_contentslock);
434 	return (error);
435 }
436 
437 int
438 cachefs_stop_cache(cnode_t *cp)
439 {
440 	fscache_t *fscp = C_TO_FSCACHE(cp);
441 	cachefscache_t *cachep = fscp->fs_cache;
442 	filegrp_t *fgp;
443 	int i;
444 	clock_t tend;
445 	int error = 0;
446 
447 	/* XXX verify lock-ordering for this function */
448 
449 	mutex_enter(&cachep->c_contentslock);
450 
451 	/*
452 	 * no work if we're already in nocache mode.  hopefully this
453 	 * will be the usual case.
454 	 */
455 
456 	if (cachep->c_flags & CACHE_NOCACHE) {
457 		mutex_exit(&cachep->c_contentslock);
458 		return (0);
459 	}
460 
461 	if ((cachep->c_flags & CACHE_NOFILL) == 0) {
462 		mutex_exit(&cachep->c_contentslock);
463 		return (EINVAL);
464 	}
465 
466 	mutex_exit(&cachep->c_contentslock);
467 
468 	/* We are already not caching if nfsv4 */
469 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) {
470 		return (0);
471 	}
472 
473 #ifdef CFSDEBUG
474 	mutex_enter(&cachep->c_fslistlock);
475 	ASSERT(fscp == cachep->c_fslist);
476 	ASSERT(fscp->fs_next == NULL);
477 	mutex_exit(&cachep->c_fslistlock);
478 
479 	printf("cachefs_stop_cache: resetting CACHE_NOCACHE\n");
480 #endif
481 
482 	/* XXX should i worry about disconnected during boot? */
483 	error = cachefs_cd_access(fscp, 1, 1);
484 	if (error)
485 		goto out;
486 
487 	error = cachefs_async_halt(&fscp->fs_workq, 1);
488 	ASSERT(error == 0);
489 	error = cachefs_async_halt(&cachep->c_workq, 1);
490 	ASSERT(error == 0);
491 	/* sigh -- best to keep going if async_halt failed. */
492 	error = 0;
493 
494 	/* XXX current order: cnode, fgp, fscp, cache. okay? */
495 
496 	cachefs_cnode_traverse(fscp, cachefs_cnode_disable_caching);
497 
498 	for (i = 0; i < CFS_FS_FGP_BUCKET_SIZE; i++) {
499 		for (fgp = fscp->fs_filegrp[i]; fgp != NULL;
500 		fgp = fgp->fg_next) {
501 			mutex_enter(&fgp->fg_mutex);
502 
503 			ASSERT((fgp->fg_flags &
504 			    (CFS_FG_WRITE | CFS_FG_UPDATED)) == 0);
505 			fgp->fg_flags |=
506 			    CFS_FG_ALLOC_FILE |
507 			    CFS_FG_ALLOC_ATTR;
508 			fgp->fg_flags &= ~CFS_FG_READ;
509 
510 			if (fgp->fg_dirvp) {
511 				fgp->fg_flags |= CFS_FG_ALLOC_FILE;
512 				VN_RELE(fgp->fg_dirvp);
513 				fgp->fg_dirvp = NULL;
514 			}
515 			if (fgp->fg_attrvp) {
516 				fgp->fg_flags |= CFS_FG_ALLOC_ATTR;
517 				VN_RELE(fgp->fg_attrvp);
518 				fgp->fg_attrvp = NULL;
519 			}
520 
521 			mutex_exit(&fgp->fg_mutex);
522 		}
523 	}
524 
525 	mutex_enter(&fscp->fs_fslock);
526 	ASSERT((fscp->fs_flags & (CFS_FS_WRITE)) == 0);
527 	fscp->fs_flags &= ~(CFS_FS_READ | CFS_FS_DIRTYINFO);
528 
529 	if (fscp->fs_fscdirvp) {
530 		VN_RELE(fscp->fs_fscdirvp);
531 		fscp->fs_fscdirvp = NULL;
532 	}
533 	if (fscp->fs_fsattrdir) {
534 		VN_RELE(fscp->fs_fsattrdir);
535 		fscp->fs_fsattrdir = NULL;
536 	}
537 	if (fscp->fs_infovp) {
538 		VN_RELE(fscp->fs_infovp);
539 		fscp->fs_infovp = NULL;
540 	}
541 	/* XXX dlog stuff? */
542 
543 	mutex_exit(&fscp->fs_fslock);
544 
545 	/*
546 	 * release resources grabbed in cachefs_cache_activate_ro
547 	 */
548 
549 	mutex_enter(&cachep->c_contentslock);
550 
551 	/* kill off the cachep worker thread */
552 	while (cachep->c_flags & CACHE_CACHEW_THREADRUN) {
553 		cachep->c_flags |= CACHE_CACHEW_THREADEXIT;
554 		cv_signal(&cachep->c_cwcv);
555 		tend = lbolt + (60 * hz);
556 		(void) cv_timedwait(&cachep->c_cwhaltcv,
557 			&cachep->c_contentslock, tend);
558 	}
559 
560 	if (cachep->c_resfilevp) {
561 		VN_RELE(cachep->c_resfilevp);
562 		cachep->c_resfilevp = NULL;
563 	}
564 	if (cachep->c_dirvp) {
565 		VN_RELE(cachep->c_dirvp);
566 		cachep->c_dirvp = NULL;
567 	}
568 	if (cachep->c_lockvp) {
569 		VN_RELE(cachep->c_lockvp);
570 		cachep->c_lockvp = NULL;
571 	}
572 	if (cachep->c_lostfoundvp) {
573 		VN_RELE(cachep->c_lostfoundvp);
574 		cachep->c_lostfoundvp = NULL;
575 	}
576 
577 	mutex_enter(&cachep->c_log_mutex);
578 	if (cachep->c_log_ctl) {
579 		cachefs_kmem_free(cachep->c_log_ctl,
580 		    sizeof (cachefs_log_control_t));
581 		cachep->c_log_ctl = NULL;
582 	}
583 	if (cachep->c_log) {
584 		cachefs_log_destroy_cookie(cachep->c_log);
585 		cachep->c_log = NULL;
586 	}
587 	mutex_exit(&cachep->c_log_mutex);
588 
589 	/* XXX do what mountroot_init does when ! foundcache */
590 
591 	cachep->c_flags |= CACHE_NOCACHE;
592 	mutex_exit(&cachep->c_contentslock);
593 
594 	/* XXX should i release this here? */
595 	cachefs_cd_release(fscp);
596 
597 out:
598 
599 	return (error);
600 }
601 
602 /*
603  * ------------------------------------------------------------------
604  *
605  *		cachefs_cache_active_rw
606  *
607  * Description:
608  *	Activates the cachefscache_t object for a read-write file system.
609  * Arguments:
610  *	cachep	the cachefscache_t object to activate
611  * Returns:
612  * Preconditions:
613  *	precond(cachep)
614  *	precond((cachep->c_flags & CACHE_NOCACHE) == 0)
615  *	precond(cachep->c_flags & CACHE_NOFILL)
616  */
617 
618 void
619 cachefs_cache_activate_rw(cachefscache_t *cachep)
620 {
621 	cachefs_rl_listhead_t *lhp;
622 
623 	ASSERT((cachep->c_flags & CACHE_NOCACHE) == 0);
624 	ASSERT(cachep->c_flags & CACHE_NOFILL);
625 
626 	mutex_enter(&cachep->c_contentslock);
627 	cachep->c_flags &= ~CACHE_NOFILL;
628 
629 	/* move the active list to the rl list */
630 	cachefs_rl_cleanup(cachep);
631 
632 	lhp = &cachep->c_rlinfo.rl_items[
633 	    CACHEFS_RL_INDEX(CACHEFS_RL_PACKED_PENDING)];
634 	if (lhp->rli_itemcnt != 0)
635 		cachep->c_flags |= CACHE_PACKED_PENDING;
636 	cachefs_cache_dirty(cachep, 0);
637 	mutex_exit(&cachep->c_contentslock);
638 }
639 
640 /*
641  * ------------------------------------------------------------------
642  *
643  *		cachefs_cache_dirty
644  *
645  * Description:
646  *	Marks the cache as dirty (active).
647  * Arguments:
648  *	cachep	the cachefscache_t to mark as dirty
649  *	lockit	1 means grab contents lock, 0 means caller grabbed it
650  * Returns:
651  * Preconditions:
652  *	precond(cachep)
653  *	precond(cache is in rw mode)
654  */
655 
656 void
657 cachefs_cache_dirty(struct cachefscache *cachep, int lockit)
658 {
659 	int error;
660 
661 	ASSERT((cachep->c_flags & (CACHE_NOCACHE | CACHE_NOFILL)) == 0);
662 
663 	if (lockit) {
664 		mutex_enter(&cachep->c_contentslock);
665 	} else {
666 		ASSERT(MUTEX_HELD(&cachep->c_contentslock));
667 	}
668 	if (cachep->c_flags & CACHE_DIRTY) {
669 		ASSERT(cachep->c_usage.cu_flags & CUSAGE_ACTIVE);
670 	} else {
671 		/*
672 		 * turn on the "cache active" (dirty) flag and write it
673 		 * synchronously to disk
674 		 */
675 		cachep->c_flags |= CACHE_DIRTY;
676 		cachep->c_usage.cu_flags |= CUSAGE_ACTIVE;
677 		if (error = vn_rdwr(UIO_WRITE, cachep->c_resfilevp,
678 		    (caddr_t)&cachep->c_usage, sizeof (struct cache_usage),
679 		    0LL, UIO_SYSSPACE, FSYNC, (rlim64_t)RLIM_INFINITY,
680 				kcred, NULL)) {
681 			cmn_err(CE_WARN,
682 			    "cachefs: clean flag write error: %d\n", error);
683 		}
684 	}
685 
686 	if (lockit)
687 		mutex_exit(&cachep->c_contentslock);
688 }
689 
690 /*
691  * ------------------------------------------------------------------
692  *
693  *		cachefs_cache_rssync
694  *
695  * Description:
696  *	Syncs out the resource file for the cachefscache_t object.
697  * Arguments:
698  *	cachep	the cachefscache_t object to operate on
699  * Returns:
700  *	Returns 0 for success, !0 on an error writing data.
701  * Preconditions:
702  *	precond(cachep)
703  *	precond(cache is in rw mode)
704  */
705 
706 int
707 cachefs_cache_rssync(struct cachefscache *cachep)
708 {
709 	int error;
710 
711 	ASSERT((cachep->c_flags & (CACHE_NOCACHE | CACHE_NOFILL |
712 	    CACHE_ALLOC_PENDING)) == 0);
713 
714 	if (cachep->c_rl_entries != NULL) {
715 		error = vn_rdwr(UIO_WRITE, cachep->c_resfilevp,
716 		    (caddr_t)cachep->c_rl_entries, MAXBSIZE,
717 		    (offset_t)((cachep->c_rl_window + 1) * MAXBSIZE),
718 		    UIO_SYSSPACE, FSYNC, RLIM_INFINITY, kcred, NULL);
719 		if (error)
720 		    cmn_err(CE_WARN, "cachefs: Can't Write rl entries Info\n");
721 		cachefs_kmem_free(cachep->c_rl_entries, MAXBSIZE);
722 		cachep->c_rl_entries = NULL;
723 	}
724 
725 	/* write the usage struct for this cache */
726 	error = vn_rdwr(UIO_WRITE, cachep->c_resfilevp,
727 		(caddr_t)&cachep->c_usage, sizeof (struct cache_usage),
728 		0LL, UIO_SYSSPACE, 0, (rlim64_t)RLIM_INFINITY, kcred, NULL);
729 	if (error) {
730 		cmn_err(CE_WARN, "cachefs: Can't Write Cache Usage Info\n");
731 	}
732 
733 	/* write the rlinfo for this cache */
734 	error = vn_rdwr(UIO_WRITE, cachep->c_resfilevp,
735 			(caddr_t)&cachep->c_rlinfo, sizeof (cachefs_rl_info_t),
736 			(offset_t)sizeof (struct cache_usage), UIO_SYSSPACE,
737 			0, (rlim64_t)RLIM_INFINITY, kcred, NULL);
738 	if (error) {
739 		cmn_err(CE_WARN, "cachefs: Can't Write Cache RL Info\n");
740 	}
741 	error = VOP_FSYNC(cachep->c_resfilevp, FSYNC, kcred, NULL);
742 	return (error);
743 }
744 
745 /*
746  * ------------------------------------------------------------------
747  *
748  *		cachefs_cache_sync
749  *
750  * Description:
751  *	Sync a cache which includes all of its fscaches.
752  * Arguments:
753  *	cachep	the cachefscache_t object to sync
754  * Returns:
755  * Preconditions:
756  *	precond(cachep)
757  *	precond(cache is in rw mode)
758  */
759 
760 void
761 cachefs_cache_sync(struct cachefscache *cachep)
762 {
763 	struct fscache *fscp;
764 	struct fscache **syncfsc;
765 	int nfscs, fscidx;
766 	int try;
767 	int done;
768 
769 	if (cachep->c_flags & (CACHE_NOCACHE | CACHE_NOFILL))
770 		return;
771 
772 	done = 0;
773 	for (try = 0; (try < 2) && !done; try++) {
774 
775 		nfscs = 0;
776 
777 		/*
778 		 * here we turn off the cache-wide DIRTY flag.  If it's still
779 		 * off when the sync completes we can write the clean flag to
780 		 * disk telling fsck it has no work to do.
781 		 */
782 #ifdef CFSCLEANFLAG
783 		mutex_enter(&cachep->c_contentslock);
784 		cachep->c_flags &= ~CACHE_DIRTY;
785 		mutex_exit(&cachep->c_contentslock);
786 #endif /* CFSCLEANFLAG */
787 
788 		cachefs_log_process_queue(cachep, 1);
789 
790 		mutex_enter(&cachep->c_fslistlock);
791 		syncfsc = cachefs_kmem_alloc(
792 		    cachep->c_refcnt * sizeof (struct fscache *), KM_SLEEP);
793 		for (fscp = cachep->c_fslist; fscp; fscp = fscp->fs_next) {
794 			fscache_hold(fscp);
795 			ASSERT(nfscs < cachep->c_refcnt);
796 			syncfsc[nfscs++] = fscp;
797 		}
798 		ASSERT(nfscs == cachep->c_refcnt);
799 		mutex_exit(&cachep->c_fslistlock);
800 		for (fscidx = 0; fscidx < nfscs; fscidx++) {
801 			fscp = syncfsc[fscidx];
802 			fscache_sync(fscp);
803 			fscache_rele(fscp);
804 		}
805 
806 		/* get rid of any unused fscache objects */
807 		mutex_enter(&cachep->c_fslistlock);
808 		fscache_list_gc(cachep);
809 		mutex_exit(&cachep->c_fslistlock);
810 
811 		/*
812 		 * here we check the cache-wide DIRTY flag.
813 		 * If it's off,
814 		 * we can write the clean flag to disk.
815 		 */
816 #ifdef CFSCLEANFLAG
817 		mutex_enter(&cachep->c_contentslock);
818 		if ((cachep->c_flags & CACHE_DIRTY) == 0) {
819 			if (cachep->c_usage.cu_flags & CUSAGE_ACTIVE) {
820 				cachep->c_usage.cu_flags &= ~CUSAGE_ACTIVE;
821 				if (cachefs_cache_rssync(cachep) == 0) {
822 					done = 1;
823 				} else {
824 					cachep->c_usage.cu_flags |=
825 						CUSAGE_ACTIVE;
826 				}
827 			} else {
828 				done = 1;
829 			}
830 		}
831 		mutex_exit(&cachep->c_contentslock);
832 #else /* CFSCLEANFLAG */
833 		mutex_enter(&cachep->c_contentslock);
834 		(void) cachefs_cache_rssync(cachep);
835 		mutex_exit(&cachep->c_contentslock);
836 		done = 1;
837 #endif /* CFSCLEANFLAG */
838 		cachefs_kmem_free(syncfsc, nfscs * sizeof (struct fscache *));
839 	}
840 }
841 
842 /*
843  * ------------------------------------------------------------------
844  *
845  *		cachefs_cache_unique
846  *
847  * Description:
848  * Arguments:
849  * Returns:
850  *	Returns a unique number.
851  * Preconditions:
852  *	precond(cachep)
853  */
854 
855 uint_t
856 cachefs_cache_unique(cachefscache_t *cachep)
857 {
858 	uint_t unique = 0;
859 	int error = 0;
860 
861 	mutex_enter(&cachep->c_contentslock);
862 	if (cachep->c_usage.cu_flags & CUSAGE_NEED_ADJUST ||
863 		++(cachep->c_unique) == 0) {
864 		cachep->c_usage.cu_unique++;
865 
866 		if (cachep->c_unique == 0)
867 			cachep->c_unique = 1;
868 		cachep->c_flags &= ~CUSAGE_NEED_ADJUST;
869 		error = cachefs_cache_rssync(cachep);
870 	}
871 	if (error == 0)
872 		unique = (cachep->c_usage.cu_unique << 16) + cachep->c_unique;
873 	mutex_exit(&cachep->c_contentslock);
874 	return (unique);
875 }
876 
877 /*
878  * Called from c_getfrontfile. Shouldn't be called from anywhere else !
879  */
880 static int
881 cachefs_createfrontfile(cnode_t *cp, struct filegrp *fgp)
882 {
883 	char name[CFS_FRONTFILE_NAME_SIZE];
884 	struct vattr *attrp = NULL;
885 	int error = 0;
886 	int mode;
887 	int alloc = 0;
888 	int freefile = 0;
889 	int ffrele = 0;
890 	int rlfree = 0;
891 	rl_entry_t rl_ent;
892 
893 #ifdef CFSDEBUG
894 	CFS_DEBUG(CFSDEBUG_FRONT)
895 		printf("c_createfrontfile: ENTER cp %p fgp %p\n",
896 			(void *)cp, (void *)fgp);
897 #endif
898 
899 	ASSERT(cp->c_frontvp == NULL);
900 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fgp->fg_fscp) == 0);
901 
902 	/* quit if we cannot write to the filegrp */
903 	if ((fgp->fg_flags & CFS_FG_WRITE) == 0) {
904 		error = ENOENT;
905 		goto out;
906 	}
907 
908 	/* find or create the filegrp attrcache file if necessary */
909 	if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) {
910 		error = filegrp_allocattr(fgp);
911 		if (error)
912 			goto out;
913 	}
914 
915 	make_ascii_name(&cp->c_id, name);
916 
917 	/* set up attributes for the front file we want to create */
918 	attrp = cachefs_kmem_zalloc(sizeof (struct vattr), KM_SLEEP);
919 	alloc++;
920 	attrp->va_mode = S_IFREG | 0666;
921 	mode = 0666;
922 	attrp->va_uid = 0;
923 	attrp->va_gid = 0;
924 	attrp->va_type = VREG;
925 	attrp->va_size = 0;
926 	attrp->va_mask = AT_SIZE | AT_TYPE | AT_MODE | AT_UID | AT_GID;
927 
928 	/* get a file from the resource counts */
929 	error = cachefs_allocfile(fgp->fg_fscp->fs_cache);
930 	if (error) {
931 		error = EINVAL;
932 		goto out;
933 	}
934 	freefile++;
935 
936 	/* create the metadata slot if necessary */
937 	if (cp->c_flags & CN_ALLOC_PENDING) {
938 		error = filegrp_create_metadata(fgp, &cp->c_metadata,
939 		    &cp->c_id);
940 		if (error) {
941 			error = EINVAL;
942 			goto out;
943 		}
944 		cp->c_flags &= ~CN_ALLOC_PENDING;
945 		cp->c_flags |= CN_UPDATED;
946 	}
947 
948 	/* get an rl entry if necessary */
949 	if (cp->c_metadata.md_rlno == 0) {
950 		rl_ent.rl_fileno = cp->c_id.cid_fileno;
951 		rl_ent.rl_local = (cp->c_id.cid_flags & CFS_CID_LOCAL) ? 1 : 0;
952 		rl_ent.rl_fsid = fgp->fg_fscp->fs_cfsid;
953 		rl_ent.rl_attrc = 0;
954 		error = cachefs_rl_alloc(fgp->fg_fscp->fs_cache, &rl_ent,
955 		    &cp->c_metadata.md_rlno);
956 		if (error)
957 			goto out;
958 		cachefs_rlent_moveto(fgp->fg_fscp->fs_cache,
959 		    CACHEFS_RL_ACTIVE, cp->c_metadata.md_rlno,
960 		    cp->c_metadata.md_frontblks);
961 		cp->c_metadata.md_rltype = CACHEFS_RL_ACTIVE;
962 		rlfree++;
963 		cp->c_flags |= CN_UPDATED; /* XXX sam: do we need this? */
964 
965 		/* increment number of front files */
966 		error = filegrp_ffhold(fgp);
967 		if (error) {
968 			error = EINVAL;
969 			goto out;
970 		}
971 		ffrele++;
972 	}
973 
974 	if (cp->c_flags & CN_ASYNC_POP_WORKING) {
975 		/* lookup the already created front file */
976 		error = VOP_LOOKUP(fgp->fg_dirvp, name, &cp->c_frontvp,
977 		    NULL, 0, NULL, kcred, NULL, NULL, NULL);
978 	} else {
979 		/* create the front file */
980 		error = VOP_CREATE(fgp->fg_dirvp, name, attrp, EXCL, mode,
981 		    &cp->c_frontvp, kcred, 0, NULL, NULL);
982 	}
983 	if (error) {
984 #ifdef CFSDEBUG
985 		CFS_DEBUG(CFSDEBUG_FRONT)
986 			printf("c_createfrontfile: Can't create cached object"
987 			    " error %u, fileno %llx\n", error,
988 			    (u_longlong_t)cp->c_id.cid_fileno);
989 #endif
990 		goto out;
991 	}
992 
993 	/* get a copy of the fid of the front file */
994 	cp->c_metadata.md_fid.fid_len = MAXFIDSZ;
995 	error = VOP_FID(cp->c_frontvp, &cp->c_metadata.md_fid, NULL);
996 	if (error) {
997 		/*
998 		 * If we get back ENOSPC then the fid we passed in was too
999 		 * small.  For now we don't do anything and map to EINVAL.
1000 		 */
1001 		if (error == ENOSPC) {
1002 			error = EINVAL;
1003 		}
1004 		goto out;
1005 	}
1006 
1007 	dnlc_purge_vp(cp->c_frontvp);
1008 
1009 	cp->c_metadata.md_flags |= MD_FILE;
1010 	cp->c_flags |= CN_UPDATED | CN_NEED_FRONT_SYNC;
1011 
1012 out:
1013 	if (error) {
1014 		if (cp->c_frontvp) {
1015 			VN_RELE(cp->c_frontvp);
1016 			(void) VOP_REMOVE(fgp->fg_dirvp, name, kcred, NULL, 0);
1017 			cp->c_frontvp = NULL;
1018 		}
1019 		if (ffrele)
1020 			filegrp_ffrele(fgp);
1021 		if (freefile)
1022 			cachefs_freefile(fgp->fg_fscp->fs_cache);
1023 		if (rlfree) {
1024 #ifdef CFSDEBUG
1025 			cachefs_rlent_verify(fgp->fg_fscp->fs_cache,
1026 			    CACHEFS_RL_ACTIVE, cp->c_metadata.md_rlno);
1027 #endif /* CFSDEBUG */
1028 			cachefs_rlent_moveto(fgp->fg_fscp->fs_cache,
1029 			    CACHEFS_RL_FREE, cp->c_metadata.md_rlno, 0);
1030 			cp->c_metadata.md_rlno = 0;
1031 			cp->c_metadata.md_rltype = CACHEFS_RL_NONE;
1032 		}
1033 		cachefs_nocache(cp);
1034 	}
1035 	if (alloc)
1036 		cachefs_kmem_free(attrp, sizeof (struct vattr));
1037 #ifdef CFSDEBUG
1038 	CFS_DEBUG(CFSDEBUG_FRONT)
1039 		printf("c_createfrontfile: EXIT error = %d name %s\n", error,
1040 			name);
1041 #endif
1042 	return (error);
1043 }
1044 
1045 /*
1046  * Releases resources associated with the front file.
1047  * Only call this routine if a ffhold has been done.
1048  * Its okay to call this routine if the front file does not exist.
1049  * Note: this routine is used even if there is no front file.
1050  */
1051 void
1052 cachefs_removefrontfile(cachefs_metadata_t *mdp, cfs_cid_t *cidp,
1053     filegrp_t *fgp)
1054 {
1055 	int error, enoent;
1056 	char name[CFS_FRONTFILE_NAME_SIZE + 2];
1057 
1058 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fgp->fg_fscp) == 0);
1059 
1060 	enoent = 0;
1061 	if (mdp->md_flags & MD_FILE) {
1062 		if (fgp->fg_dirvp == NULL) {
1063 			cmn_err(CE_WARN, "cachefs: remove error, run fsck\n");
1064 			return;
1065 		}
1066 		make_ascii_name(cidp, name);
1067 		error = VOP_REMOVE(fgp->fg_dirvp, name, kcred, NULL, 0);
1068 		if (error == ENOENT)
1069 			enoent = 1;
1070 		if ((error) && (error != ENOENT)) {
1071 			cmn_err(CE_WARN, "UFS remove error %s %d, run fsck\n",
1072 			    name, error);
1073 		}
1074 		if (mdp->md_flags & MD_ACLDIR) {
1075 			(void) strcat(name, ".d");
1076 			error = VOP_RMDIR(fgp->fg_dirvp, name, fgp->fg_dirvp,
1077 			    kcred, NULL, 0);
1078 			if ((error) && (error != ENOENT)) {
1079 				cmn_err(CE_WARN, "frontfs rmdir error %s %d"
1080 				    "; run fsck\n", name, error);
1081 			}
1082 		}
1083 		mdp->md_flags &= ~(MD_FILE | MD_POPULATED | MD_ACL | MD_ACLDIR);
1084 		bzero(&mdp->md_allocinfo, mdp->md_allocents *
1085 			sizeof (struct cachefs_allocmap));
1086 		cachefs_freefile(fgp->fg_fscp->fs_cache);
1087 	}
1088 
1089 	/*
1090 	 * Clear packed bit, fastsymlinks and special files
1091 	 * do not have a front file.
1092 	 */
1093 	mdp->md_flags &= ~MD_PACKED;
1094 
1095 	/* XXX either rename routine or move this to caller */
1096 	if (enoent == 0)
1097 		filegrp_ffrele(fgp);
1098 
1099 	if (mdp->md_frontblks) {
1100 		cachefs_freeblocks(fgp->fg_fscp->fs_cache, mdp->md_frontblks,
1101 		    mdp->md_rltype);
1102 		mdp->md_frontblks = 0;
1103 	}
1104 }
1105 
1106 /*
1107  * This is the interface to the rest of CFS. This takes a cnode, and returns
1108  * the frontvp (stuffs it in the cnode). This creates an attrcache slot and
1109  * and frontfile if necessary.
1110  */
1111 
1112 int
1113 cachefs_getfrontfile(cnode_t *cp)
1114 {
1115 	struct filegrp *fgp = cp->c_filegrp;
1116 	int error;
1117 	struct vattr va;
1118 
1119 #ifdef CFSDEBUG
1120 	CFS_DEBUG(CFSDEBUG_SUBR)
1121 		printf("c_getfrontfile: ENTER cp %p\n", (void *)cp);
1122 #endif
1123 
1124 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fgp->fg_fscp) == 0);
1125 	ASSERT(MUTEX_HELD(&cp->c_statelock));
1126 
1127 	/*
1128 	 * Now we check to see if there is a front file for this entry.
1129 	 * If there is, we get the vnode for it and stick it in the cnode.
1130 	 * Otherwise, we create a front file, get the vnode for it and stick
1131 	 * it in the cnode.
1132 	 */
1133 	if (cp->c_flags & CN_STALE) {
1134 		cp->c_flags |= CN_NOCACHE;
1135 		error = ESTALE;
1136 		goto out;
1137 	}
1138 
1139 	/*
1140 	 * If the cnode is being populated, and we're not the populating
1141 	 * thread, then block until the pop thread completes.  If we are the
1142 	 * pop thread, then we may come in here, but not to nuke the directory
1143 	 * cnode at a critical juncture.  If we return from a cv_wait and the
1144 	 * cnode is now stale, don't bother trying to get the front file.
1145 	 */
1146 	while ((cp->c_flags & CN_ASYNC_POP_WORKING) &&
1147 	    (cp->c_popthrp != curthread)) {
1148 		cv_wait(&cp->c_popcv, &cp->c_statelock);
1149 		if (cp->c_flags & CN_STALE) {
1150 			cp->c_flags |= CN_NOCACHE;
1151 			error = ESTALE;
1152 			goto out;
1153 		}
1154 	}
1155 
1156 	if ((cp->c_metadata.md_flags & MD_FILE) == 0) {
1157 #ifdef CFSDEBUG
1158 		if (cp->c_frontvp != NULL)
1159 			CFS_DEBUG(CFSDEBUG_FRONT)
1160 				printf(
1161 		"c_getfrontfile: !MD_FILE and frontvp not null cp %p\n",
1162 				    (void *)cp);
1163 #endif
1164 		if (CTOV(cp)->v_type == VDIR)
1165 			ASSERT((cp->c_metadata.md_flags & MD_POPULATED) == 0);
1166 		error = cachefs_createfrontfile(cp, fgp);
1167 		if (error)
1168 			goto out;
1169 	} else {
1170 		/*
1171 		 * A front file exists, all we need to do is to grab the fid,
1172 		 * do a VFS_VGET() on the fid, stuff the vnode in the cnode,
1173 		 * and return.
1174 		 */
1175 		if (fgp->fg_dirvp == NULL) {
1176 			cmn_err(CE_WARN, "cachefs: gff0: corrupted file system"
1177 				" run fsck\n");
1178 			cachefs_inval_object(cp);
1179 			cp->c_flags |= CN_NOCACHE;
1180 			error = ESTALE;
1181 			goto out;
1182 		}
1183 		error = VFS_VGET(fgp->fg_dirvp->v_vfsp, &cp->c_frontvp,
1184 				&cp->c_metadata.md_fid);
1185 		if (error || (cp->c_frontvp == NULL)) {
1186 #ifdef CFSDEBUG
1187 			CFS_DEBUG(CFSDEBUG_FRONT)
1188 				printf("cachefs: "
1189 				    "gff1: front file system error %d\n",
1190 				    error);
1191 #endif /* CFSDEBUG */
1192 			cachefs_inval_object(cp);
1193 			cp->c_flags |= CN_NOCACHE;
1194 			error = ESTALE;
1195 			goto out;
1196 		}
1197 
1198 		/* don't need to check timestamps if need_front_sync is set */
1199 		if (cp->c_flags & CN_NEED_FRONT_SYNC) {
1200 			error = 0;
1201 			goto out;
1202 		}
1203 
1204 		/* don't need to check empty directories */
1205 		if (CTOV(cp)->v_type == VDIR &&
1206 		    ((cp->c_metadata.md_flags & MD_POPULATED) == 0)) {
1207 			error = 0;
1208 			goto out;
1209 		}
1210 
1211 		/* get modify time of the front file */
1212 		va.va_mask = AT_MTIME;
1213 		error = VOP_GETATTR(cp->c_frontvp, &va, 0, kcred, NULL);
1214 		if (error) {
1215 			cmn_err(CE_WARN, "cachefs: gff2: front file"
1216 				" system error %d", error);
1217 			cachefs_inval_object(cp);
1218 			error = (cp->c_flags & CN_NOCACHE) ? ESTALE : 0;
1219 			goto out;
1220 		}
1221 
1222 		/* compare with modify time stored in metadata */
1223 		if (bcmp(&va.va_mtime, &cp->c_metadata.md_timestamp,
1224 		    sizeof (timestruc_t)) != 0) {
1225 #ifdef CFSDEBUG
1226 			CFS_DEBUG(CFSDEBUG_GENERAL | CFSDEBUG_INVALIDATE) {
1227 				long sec, nsec;
1228 				sec = cp->c_metadata.md_timestamp.tv_sec;
1229 				nsec = cp->c_metadata.md_timestamp.tv_nsec;
1230 				printf("c_getfrontfile: timestamps don't"
1231 					" match fileno %lld va %lx %lx"
1232 					" meta %lx %lx\n",
1233 					(u_longlong_t)cp->c_id.cid_fileno,
1234 					va.va_mtime.tv_sec,
1235 					va.va_mtime.tv_nsec, sec, nsec);
1236 			}
1237 #endif
1238 			cachefs_inval_object(cp);
1239 			error = (cp->c_flags & CN_NOCACHE) ? ESTALE : 0;
1240 		}
1241 	}
1242 out:
1243 
1244 #ifdef CFSDEBUG
1245 	CFS_DEBUG(CFSDEBUG_FRONT)
1246 		printf("c_getfrontfile: EXIT error = %d\n", error);
1247 #endif
1248 	return (error);
1249 }
1250 
1251 void
1252 cachefs_inval_object(cnode_t *cp)
1253 {
1254 	cachefscache_t *cachep = C_TO_FSCACHE(cp)->fs_cache;
1255 	struct filegrp *fgp = cp->c_filegrp;
1256 	int error;
1257 
1258 	ASSERT(CFS_ISFS_BACKFS_NFSV4(C_TO_FSCACHE(cp)) == 0);
1259 	ASSERT(MUTEX_HELD(&cp->c_statelock));
1260 	ASSERT((cp->c_flags & CN_ASYNC_POP_WORKING) == 0 ||
1261 		cp->c_popthrp == curthread);
1262 #if 0
1263 	CFS_DEBUG(CFSDEBUG_SUBR)
1264 		printf("c_inval_object: ENTER cp %p\n", (void *)cp);
1265 	if (cp->c_flags & (CN_ASYNC_POPULATE | CN_ASYNC_POP_WORKING))
1266 		debug_enter("inval object during async pop");
1267 #endif
1268 	cp->c_flags |= CN_NOCACHE;
1269 
1270 	/* if we cannot modify the cache */
1271 	if (C_TO_FSCACHE(cp)->fs_cache->c_flags &
1272 	    (CACHE_NOFILL | CACHE_NOCACHE)) {
1273 		goto out;
1274 	}
1275 
1276 	/* if there is a front file */
1277 	if (cp->c_metadata.md_flags & MD_FILE) {
1278 		if (fgp->fg_dirvp == NULL)
1279 			goto out;
1280 
1281 		/* get the front file vp if necessary */
1282 		if (cp->c_frontvp == NULL) {
1283 
1284 			error = VFS_VGET(fgp->fg_dirvp->v_vfsp, &cp->c_frontvp,
1285 				&cp->c_metadata.md_fid);
1286 			if (error || (cp->c_frontvp == NULL)) {
1287 #ifdef CFSDEBUG
1288 				CFS_DEBUG(CFSDEBUG_FRONT)
1289 					printf("cachefs: "
1290 					    "io: front file error %d\n", error);
1291 #endif /* CFSDEBUG */
1292 				goto out;
1293 			}
1294 		}
1295 
1296 		/* truncate the file to zero size */
1297 		error = cachefs_frontfile_size(cp, 0);
1298 		if (error)
1299 			goto out;
1300 		cp->c_flags &= ~CN_NOCACHE;
1301 
1302 		/* if a directory, v_type is zero if called from initcnode */
1303 		if (cp->c_attr.va_type == VDIR) {
1304 			if (cp->c_usage < CFS_DIRCACHE_COST) {
1305 				cp->c_invals++;
1306 				if (cp->c_invals > CFS_DIRCACHE_INVAL) {
1307 					cp->c_invals = 0;
1308 				}
1309 			} else
1310 				cp->c_invals = 0;
1311 			cp->c_usage = 0;
1312 		}
1313 	} else {
1314 		cp->c_flags &= ~CN_NOCACHE;
1315 	}
1316 
1317 out:
1318 	if ((cp->c_metadata.md_flags & MD_PACKED) &&
1319 	    (cp->c_metadata.md_rltype != CACHEFS_RL_MODIFIED) &&
1320 	    ((cachep->c_flags & CACHE_NOFILL) == 0)) {
1321 		ASSERT(cp->c_metadata.md_rlno != 0);
1322 		if (cp->c_metadata.md_rltype != CACHEFS_RL_PACKED_PENDING) {
1323 			cachefs_rlent_moveto(cachep,
1324 			    CACHEFS_RL_PACKED_PENDING,
1325 			    cp->c_metadata.md_rlno,
1326 			    cp->c_metadata.md_frontblks);
1327 			cp->c_metadata.md_rltype = CACHEFS_RL_PACKED_PENDING;
1328 			/* unconditionally set CN_UPDATED below */
1329 		}
1330 	}
1331 
1332 	cachefs_purgeacl(cp);
1333 
1334 	if (cp->c_flags & CN_ASYNC_POP_WORKING)
1335 		cp->c_flags |= CN_NOCACHE;
1336 	cp->c_metadata.md_flags &= ~(MD_POPULATED | MD_INVALREADDIR |
1337 	    MD_FASTSYMLNK);
1338 	cp->c_flags &= ~CN_NEED_FRONT_SYNC;
1339 	cp->c_flags |= CN_UPDATED;
1340 
1341 	/*
1342 	 * If the object invalidated is a directory, the dnlc should be purged
1343 	 * to elide all references to this (directory) vnode.
1344 	 */
1345 	if (CTOV(cp)->v_type == VDIR)
1346 		dnlc_purge_vp(CTOV(cp));
1347 
1348 #ifdef CFSDEBUG
1349 	CFS_DEBUG(CFSDEBUG_SUBR)
1350 		printf("c_inval_object: EXIT\n");
1351 #endif
1352 }
1353 
1354 void
1355 make_ascii_name(cfs_cid_t *cidp, char *strp)
1356 {
1357 	int i = sizeof (uint_t) * 4;
1358 	u_longlong_t index;
1359 	ino64_t name;
1360 
1361 	if (cidp->cid_flags & CFS_CID_LOCAL)
1362 		*strp++ = 'L';
1363 	name = (ino64_t)cidp->cid_fileno;
1364 	do {
1365 		index = (((u_longlong_t)name) & 0xf000000000000000) >> 60;
1366 		index &= (u_longlong_t)0xf;
1367 		ASSERT(index < (u_longlong_t)16);
1368 		*strp++ = "0123456789abcdef"[index];
1369 		name <<= 4;
1370 	} while (--i);
1371 	*strp = '\0';
1372 }
1373 
1374 void
1375 cachefs_nocache(cnode_t *cp)
1376 {
1377 	fscache_t *fscp = C_TO_FSCACHE(cp);
1378 	cachefscache_t *cachep = fscp->fs_cache;
1379 
1380 #ifdef CFSDEBUG
1381 	CFS_DEBUG(CFSDEBUG_SUBR)
1382 		printf("c_nocache: ENTER cp %p\n", (void *)cp);
1383 #endif
1384 
1385 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
1386 	ASSERT(MUTEX_HELD(&cp->c_statelock));
1387 	if ((cp->c_flags & CN_NOCACHE) == 0) {
1388 #ifdef CFSDEBUG
1389 		CFS_DEBUG(CFSDEBUG_INVALIDATE)
1390 			printf("cachefs_nocache: invalidating %llu\n",
1391 			    (u_longlong_t)cp->c_id.cid_fileno);
1392 #endif
1393 		/*
1394 		 * Here we are waiting until inactive time to do
1395 		 * the inval_object.  In case we don't get to inactive
1396 		 * (because of a crash, say) we set up a timestamp mismatch
1397 		 * such that getfrontfile will blow the front file away
1398 		 * next time we try to use it.
1399 		 */
1400 		cp->c_metadata.md_timestamp.tv_sec = 0;
1401 		cp->c_metadata.md_timestamp.tv_nsec = 0;
1402 		cp->c_metadata.md_flags &= ~(MD_POPULATED | MD_INVALREADDIR |
1403 		    MD_FASTSYMLNK);
1404 		cp->c_flags &= ~CN_NEED_FRONT_SYNC;
1405 
1406 		cachefs_purgeacl(cp);
1407 
1408 		/*
1409 		 * It is possible we can nocache while disconnected.
1410 		 * A directory could be nocached by running out of space.
1411 		 * A regular file should only be nocached if an I/O error
1412 		 * occurs to the front fs.
1413 		 * We count on the item staying on the modified list
1414 		 * so we do not loose the cid to fid mapping for directories.
1415 		 */
1416 
1417 		if ((cp->c_metadata.md_flags & MD_PACKED) &&
1418 		    (cp->c_metadata.md_rltype != CACHEFS_RL_MODIFIED) &&
1419 		    ((cachep->c_flags & CACHE_NOFILL) == 0)) {
1420 			ASSERT(cp->c_metadata.md_rlno != 0);
1421 			if (cp->c_metadata.md_rltype !=
1422 			    CACHEFS_RL_PACKED_PENDING) {
1423 				cachefs_rlent_moveto(cachep,
1424 				    CACHEFS_RL_PACKED_PENDING,
1425 				    cp->c_metadata.md_rlno,
1426 				    cp->c_metadata.md_frontblks);
1427 				cp->c_metadata.md_rltype =
1428 				    CACHEFS_RL_PACKED_PENDING;
1429 				/* unconditionally set CN_UPDATED below */
1430 			}
1431 		}
1432 
1433 		if (CTOV(cp)->v_type == VDIR)
1434 			dnlc_purge_vp(CTOV(cp));
1435 		cp->c_flags |= (CN_NOCACHE | CN_UPDATED);
1436 	}
1437 
1438 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_NOCACHE))
1439 		cachefs_log_nocache(cachep, 0, fscp->fs_cfsvfsp,
1440 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno);
1441 
1442 #ifdef CFSDEBUG
1443 	CFS_DEBUG(CFSDEBUG_SUBR)
1444 		printf("c_nocache: EXIT cp %p\n", (void *)cp);
1445 #endif
1446 }
1447 
1448 /*
1449  * Checks to see if the page is in the disk cache, by checking the allocmap.
1450  */
1451 int
1452 cachefs_check_allocmap(cnode_t *cp, u_offset_t off)
1453 {
1454 	int i;
1455 	size_t dbl_size_to_look = cp->c_attr.va_size - off;
1456 	uint_t	size_to_look;
1457 
1458 	if (dbl_size_to_look > (u_offset_t)PAGESIZE)
1459 		size_to_look = (uint_t)PAGESIZE;
1460 	else
1461 		/*LINTED alignment okay*/
1462 		size_to_look = (uint_t)dbl_size_to_look;
1463 
1464 	for (i = 0; i < cp->c_metadata.md_allocents; i++) {
1465 		struct cachefs_allocmap *allocp =
1466 				cp->c_metadata.md_allocinfo + i;
1467 
1468 		if (off >= allocp->am_start_off) {
1469 			if ((off + size_to_look) <=
1470 			    (allocp->am_start_off + allocp->am_size)) {
1471 				struct fscache *fscp = C_TO_FSCACHE(cp);
1472 				cachefscache_t *cachep = fscp->fs_cache;
1473 
1474 				if (CACHEFS_LOG_LOGGING(cachep,
1475 				    CACHEFS_LOG_CALLOC))
1476 					cachefs_log_calloc(cachep, 0,
1477 					    fscp->fs_cfsvfsp,
1478 					    &cp->c_metadata.md_cookie,
1479 					    cp->c_id.cid_fileno,
1480 					    off, size_to_look);
1481 			/*
1482 			 * Found the page in the CFS disk cache.
1483 			 */
1484 				return (1);
1485 			}
1486 		} else {
1487 			return (0);
1488 		}
1489 	}
1490 	return (0);
1491 }
1492 
1493 /*
1494  * Merges adjacent allocmap entries together where possible, e.g.
1495  *   offset=0x0,     size=0x40000
1496  *   offset=0x40000, size=0x20000	becomes just offset=0x0, size-0x90000
1497  *   offset=0x60000, size=0x30000
1498  */
1499 
1500 
1501 void
1502 cachefs_coalesce_allocmap(struct cachefs_metadata *cmd)
1503 {
1504 	int i, reduced = 0;
1505 	struct cachefs_allocmap *allocp, *nallocp;
1506 
1507 	nallocp = allocp = cmd->md_allocinfo;
1508 	allocp++;
1509 	for (i = 1; i < cmd->md_allocents; i++, allocp++) {
1510 		if (nallocp->am_start_off + nallocp->am_size ==
1511 						allocp->am_start_off) {
1512 			nallocp->am_size += allocp->am_size;
1513 			reduced++;
1514 		} else {
1515 			nallocp++;
1516 			nallocp->am_start_off = allocp->am_start_off;
1517 			nallocp->am_size = allocp->am_size;
1518 		}
1519 	}
1520 	cmd->md_allocents -= reduced;
1521 }
1522 
1523 /*
1524  * Updates the allocmap to reflect a new chunk of data that has been
1525  * populated.
1526  */
1527 void
1528 cachefs_update_allocmap(cnode_t *cp, u_offset_t off, size_t size)
1529 {
1530 	int i;
1531 	struct cachefs_allocmap *allocp;
1532 	struct fscache *fscp =  C_TO_FSCACHE(cp);
1533 	cachefscache_t *cachep = fscp->fs_cache;
1534 	u_offset_t saveoff;
1535 	u_offset_t savesize;
1536 	u_offset_t logoff = off;
1537 	size_t logsize = size;
1538 	u_offset_t endoff;
1539 	u_offset_t tmpendoff;
1540 
1541 	/*
1542 	 * We try to see if we can coalesce the current block into an existing
1543 	 * allocation and mark it as such.
1544 	 * If we can't do that then we make a new entry in the allocmap.
1545 	 * when we run out of allocmaps, put the cnode in NOCACHE mode.
1546 	 */
1547 again:
1548 	allocp = cp->c_metadata.md_allocinfo;
1549 	for (i = 0; i < cp->c_metadata.md_allocents; i++, allocp++) {
1550 
1551 		if (off <= (allocp->am_start_off)) {
1552 			endoff = off + size;
1553 			if (endoff >= allocp->am_start_off) {
1554 				tmpendoff = allocp->am_start_off +
1555 						allocp->am_size;
1556 				if (endoff < tmpendoff)
1557 					endoff = tmpendoff;
1558 				allocp->am_size = endoff - off;
1559 				allocp->am_start_off = off;
1560 				cachefs_coalesce_allocmap(&cp->c_metadata);
1561 				allocp = cp->c_metadata.md_allocinfo;
1562 				if (allocp->am_size >= cp->c_size)
1563 					cp->c_metadata.md_flags |= MD_POPULATED;
1564 				return;
1565 			} else {
1566 				saveoff = off;
1567 				savesize = size;
1568 				off = allocp->am_start_off;
1569 				size = allocp->am_size;
1570 				allocp->am_size = savesize;
1571 				allocp->am_start_off = saveoff;
1572 				goto again;
1573 			}
1574 		} else {
1575 			endoff = allocp->am_start_off + allocp->am_size;
1576 			if (off < endoff) {
1577 				tmpendoff = off + size;
1578 				if (endoff < tmpendoff)
1579 					endoff = tmpendoff;
1580 				allocp->am_size = endoff - allocp->am_start_off;
1581 				cachefs_coalesce_allocmap(&cp->c_metadata);
1582 				allocp = cp->c_metadata.md_allocinfo;
1583 				if (allocp->am_size >= cp->c_size)
1584 					cp->c_metadata.md_flags |= MD_POPULATED;
1585 				return;
1586 			}
1587 			if (off == (allocp->am_start_off + allocp->am_size)) {
1588 				allocp->am_size += size;
1589 				cachefs_coalesce_allocmap(&cp->c_metadata);
1590 				allocp = cp->c_metadata.md_allocinfo;
1591 				if (allocp->am_size >= cp->c_size)
1592 					cp->c_metadata.md_flags |= MD_POPULATED;
1593 				return;
1594 			}
1595 		}
1596 	}
1597 	if (i == C_MAX_ALLOCINFO_SLOTS) {
1598 #ifdef CFSDEBUG
1599 		CFS_DEBUG(CFSDEBUG_ALLOCMAP)
1600 			printf("c_update_alloc_map: "
1601 			    "Too many allinfo entries cp %p fileno %llu %p\n",
1602 			    (void *)cp, (u_longlong_t)cp->c_id.cid_fileno,
1603 			    (void *)cp->c_metadata.md_allocinfo);
1604 #endif
1605 		cachefs_nocache(cp);
1606 		return;
1607 	}
1608 	allocp->am_start_off = off;
1609 	allocp->am_size = (u_offset_t)size;
1610 	if (allocp->am_size >= cp->c_size)
1611 		cp->c_metadata.md_flags |= MD_POPULATED;
1612 	cp->c_metadata.md_allocents++;
1613 
1614 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_UALLOC))
1615 		cachefs_log_ualloc(cachep, 0, fscp->fs_cfsvfsp,
1616 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno,
1617 		    logoff, logsize);
1618 }
1619 
1620 /*
1621  * CFS population function
1622  *
1623  * before async population, this function used to turn on the cnode
1624  * flags CN_UPDATED, CN_NEED_FRONT_SYNC, and CN_POPULATION_PENDING.
1625  * now, however, it's the responsibility of the caller to do this if
1626  * this function returns 0 (no error).
1627  */
1628 
1629 int
1630 cachefs_populate(cnode_t *cp, u_offset_t off, size_t popsize, vnode_t *frontvp,
1631     vnode_t *backvp, u_offset_t cpsize, cred_t *cr)
1632 {
1633 	int error = 0;
1634 	caddr_t addr;
1635 	u_offset_t upto;
1636 	uint_t size;
1637 	u_offset_t from = off;
1638 	cachefscache_t *cachep = C_TO_FSCACHE(cp)->fs_cache;
1639 	ssize_t resid;
1640 	struct fbuf *fbp;
1641 	caddr_t buf = kmem_alloc(MAXBSIZE, KM_SLEEP);
1642 
1643 #ifdef CFSDEBUG
1644 	CFS_DEBUG(CFSDEBUG_VOPS)
1645 		printf("cachefs_populate: ENTER cp %p off %lld\n",
1646 		    (void *)cp, off);
1647 #endif
1648 
1649 	upto = MIN((off + popsize), cpsize);
1650 
1651 	while (from < upto) {
1652 		u_offset_t blkoff = (from & (offset_t)MAXBMASK);
1653 		uint_t n = from - blkoff;
1654 
1655 		size = upto - from;
1656 		if (upto > (blkoff + MAXBSIZE))
1657 			size = MAXBSIZE - n;
1658 
1659 		error = fbread(backvp, (offset_t)blkoff, n + size,
1660 			S_OTHER, &fbp);
1661 		if (CFS_TIMEOUT(C_TO_FSCACHE(cp), error))
1662 			goto out;
1663 		else if (error) {
1664 #ifdef CFSDEBUG
1665 			CFS_DEBUG(CFSDEBUG_BACK)
1666 				printf("cachefs_populate: fbread error %d\n",
1667 				    error);
1668 #endif
1669 			goto out;
1670 		}
1671 
1672 		addr = fbp->fb_addr;
1673 		ASSERT(addr != NULL);
1674 		ASSERT(n + size <= MAXBSIZE);
1675 		bcopy(addr, buf, n + size);
1676 		fbrelse(fbp, S_OTHER);
1677 
1678 		if (n == 0 || cachefs_check_allocmap(cp, blkoff) == 0) {
1679 			if (error = cachefs_allocblocks(cachep, 1,
1680 			    cp->c_metadata.md_rltype))
1681 				goto out;
1682 			cp->c_metadata.md_frontblks++;
1683 		}
1684 		resid = 0;
1685 		error = vn_rdwr(UIO_WRITE, frontvp, buf + n, size,
1686 				(offset_t)from, UIO_SYSSPACE, 0,
1687 				(rlim64_t)RLIM64_INFINITY, cr, &resid);
1688 		if (error) {
1689 #ifdef CFSDEBUG
1690 			CFS_DEBUG(CFSDEBUG_FRONT)
1691 				printf("cachefs_populate: "
1692 				    "Got error = %d from vn_rdwr\n", error);
1693 #endif
1694 			goto out;
1695 		}
1696 #ifdef CFSDEBUG
1697 		if (resid)
1698 			CFS_DEBUG(CFSDEBUG_FRONT)
1699 				printf("cachefs_populate: non-zero resid %ld\n",
1700 				    resid);
1701 #endif
1702 		from += size;
1703 	}
1704 	(void) cachefs_update_allocmap(cp, off, upto - off);
1705 out:
1706 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_POPULATE))
1707 		cachefs_log_populate(cachep, error,
1708 		    C_TO_FSCACHE(cp)->fs_cfsvfsp,
1709 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno, off,
1710 		    popsize);
1711 
1712 #ifdef CFSDEBUG
1713 	CFS_DEBUG(CFSDEBUG_VOPS)
1714 		printf("cachefs_populate: EXIT cp %p error %d\n",
1715 		    (void *)cp, error);
1716 #endif
1717 	kmem_free(buf, MAXBSIZE);
1718 
1719 	return (error);
1720 }
1721 
1722 /*
1723  * due to compiler error we shifted cnode to the last argument slot.
1724  * occurred during large files project - XXX.
1725  */
1726 void
1727 cachefs_cluster_allocmap(u_offset_t off, u_offset_t *popoffp,
1728     size_t *popsizep, size_t size, struct cnode *cp)
1729 {
1730 	int i;
1731 	u_offset_t lastoff = 0;
1732 	u_offset_t forward_diff = 0;
1733 	u_offset_t backward_diff = 0;
1734 
1735 	ASSERT(size <= C_TO_FSCACHE(cp)->fs_info.fi_popsize);
1736 
1737 #ifdef CFSDEBUG
1738 	CFS_DEBUG(CFSDEBUG_SUBR)
1739 		printf("cachefs_cluster_allocmap: off %llx, size %llx, "
1740 			"c_size %llx\n", off, size, (longlong_t)cp->c_size);
1741 #endif /* CFSDEBUG */
1742 	for (i = 0; i < cp->c_metadata.md_allocents; i++) {
1743 		struct cachefs_allocmap *allocp =
1744 			cp->c_metadata.md_allocinfo + i;
1745 
1746 		if (allocp->am_start_off > off) {
1747 			if ((off + size) > allocp->am_start_off) {
1748 				forward_diff = allocp->am_start_off - off;
1749 				backward_diff = size - forward_diff;
1750 				if (backward_diff > off)
1751 					backward_diff = off;
1752 				if (lastoff > (off - backward_diff))
1753 					backward_diff = off - lastoff;
1754 			} else {
1755 				forward_diff = size;
1756 			}
1757 			*popoffp = (off - backward_diff) & (offset_t)PAGEMASK;
1758 			*popsizep = ((off + forward_diff) - *popoffp) &
1759 				(offset_t)PAGEMASK;
1760 			return;
1761 		} else {
1762 			lastoff = allocp->am_start_off + allocp->am_size;
1763 		}
1764 	}
1765 	if ((lastoff + size) > off) {
1766 		*popoffp = (lastoff & (offset_t)PAGEMASK);
1767 	} else {
1768 		 *popoffp = off & (offset_t)PAGEMASK;
1769 	}
1770 
1771 	/*
1772 	 * 64bit project: popsize is the chunk size used to populate the
1773 	 * cache (default 64K). As such, 32 bit should suffice.
1774 	 */
1775 	if ((*popoffp + size) > cp->c_size)
1776 		*popsizep = (cp->c_size - *popoffp + PAGEOFFSET) &
1777 			(offset_t)PAGEMASK;
1778 	else if (size < PAGESIZE)
1779 		*popsizep = (size + PAGEOFFSET) &
1780 			(offset_t)PAGEMASK;
1781 	else
1782 		*popsizep = size & (offset_t)PAGEMASK;
1783 
1784 #ifdef CFSDEBUG
1785 	CFS_DEBUG(CFSDEBUG_SUBR)
1786 		printf("cachefs_cluster_allocmap: popoff %llx, popsize %llx\n",
1787 			(u_longlong_t)(*popoffp), (u_longlong_t)(*popsizep));
1788 #endif /* CFSDEBUG */
1789 }
1790 
1791 /*
1792  * "populate" a symlink in the cache
1793  */
1794 int
1795 cachefs_stuffsymlink(cnode_t *cp, caddr_t buf, int buflen)
1796 {
1797 	int error = 0;
1798 	struct fscache *fscp = C_TO_FSCACHE(cp);
1799 	cachefscache_t *cachep = fscp->fs_cache;
1800 	struct cachefs_metadata *mdp = &cp->c_metadata;
1801 
1802 	ASSERT(RW_WRITE_HELD(&cp->c_rwlock));
1803 	ASSERT(MUTEX_HELD(&cp->c_statelock));
1804 
1805 	if (CFS_ISFS_BACKFS_NFSV4(fscp))
1806 		goto out;
1807 
1808 	if (cp->c_flags & CN_NOCACHE)
1809 		return (ENOENT);
1810 
1811 	cp->c_size = (u_offset_t)buflen;
1812 
1813 	/* if can create a fast sym link */
1814 	if (buflen <= C_FSL_SIZE) {
1815 		/* give up the front file resources */
1816 		if (mdp->md_rlno) {
1817 			cachefs_removefrontfile(mdp, &cp->c_id, cp->c_filegrp);
1818 			cachefs_rlent_moveto(cachep, CACHEFS_RL_FREE,
1819 			    mdp->md_rlno, 0);
1820 			mdp->md_rlno = 0;
1821 			mdp->md_rltype = CACHEFS_RL_NONE;
1822 		}
1823 		/* put sym link contents in allocinfo in metadata */
1824 		bzero(mdp->md_allocinfo, C_FSL_SIZE);
1825 		bcopy(buf, mdp->md_allocinfo, buflen);
1826 
1827 		mdp->md_flags |= MD_FASTSYMLNK;
1828 		cp->c_flags &= ~CN_NEED_FRONT_SYNC;
1829 		cp->c_flags |= CN_UPDATED;
1830 		goto out;
1831 	}
1832 
1833 	/* else create a sym link in a front file */
1834 	if (cp->c_frontvp == NULL)
1835 		error = cachefs_getfrontfile(cp);
1836 	if (error)
1837 		goto out;
1838 
1839 	/* truncate front file */
1840 	error = cachefs_frontfile_size(cp, 0);
1841 	mdp->md_flags &= ~(MD_FASTSYMLNK | MD_POPULATED);
1842 	if (error)
1843 		goto out;
1844 
1845 	/* get space for the sym link */
1846 	error = cachefs_allocblocks(cachep, 1, cp->c_metadata.md_rltype);
1847 	if (error)
1848 		goto out;
1849 
1850 	/* write the sym link to the front file */
1851 	error = vn_rdwr(UIO_WRITE, cp->c_frontvp, buf, buflen, 0,
1852 	    UIO_SYSSPACE, 0, RLIM_INFINITY, kcred, NULL);
1853 	if (error) {
1854 		cachefs_freeblocks(cachep, 1, cp->c_metadata.md_rltype);
1855 		goto out;
1856 	}
1857 
1858 	cp->c_metadata.md_flags |= MD_POPULATED;
1859 	cp->c_flags |= CN_NEED_FRONT_SYNC;
1860 	cp->c_flags |= CN_UPDATED;
1861 
1862 out:
1863 	if (CACHEFS_LOG_LOGGING(cachep, CACHEFS_LOG_CSYMLINK))
1864 		cachefs_log_csymlink(cachep, error, fscp->fs_cfsvfsp,
1865 		    &cp->c_metadata.md_cookie, cp->c_id.cid_fileno, buflen);
1866 
1867 	return (error);
1868 }
1869 
1870 /*
1871  * Reads the full contents of the symbolic link from the back file system.
1872  * *bufp is set to a MAXPATHLEN buffer that must be freed when done
1873  * *buflenp is the length of the link
1874  */
1875 int
1876 cachefs_readlink_back(cnode_t *cp, cred_t *cr, caddr_t *bufp, int *buflenp)
1877 {
1878 	int error;
1879 	struct uio uio;
1880 	struct iovec iov;
1881 	caddr_t buf;
1882 	fscache_t *fscp = C_TO_FSCACHE(cp);
1883 
1884 	ASSERT(MUTEX_HELD(&cp->c_statelock));
1885 
1886 	*bufp = NULL;
1887 
1888 	/* get back vnode */
1889 	if (cp->c_backvp == NULL) {
1890 		error = cachefs_getbackvp(fscp, cp);
1891 		if (error)
1892 			return (error);
1893 	}
1894 
1895 	/* set up for the readlink */
1896 	bzero(&uio, sizeof (struct uio));
1897 	bzero(&iov, sizeof (struct iovec));
1898 	buf = cachefs_kmem_alloc(MAXPATHLEN, KM_SLEEP);
1899 	iov.iov_base = buf;
1900 	iov.iov_len = MAXPATHLEN;
1901 	uio.uio_iov = &iov;
1902 	uio.uio_iovcnt = 1;
1903 	uio.uio_resid = MAXPATHLEN;
1904 	uio.uio_segflg = UIO_SYSSPACE;
1905 	uio.uio_loffset = 0;
1906 	uio.uio_fmode = 0;
1907 	uio.uio_extflg = UIO_COPY_CACHED;
1908 	uio.uio_llimit = MAXOFFSET_T;
1909 
1910 	/* get the link data */
1911 	CFS_DPRINT_BACKFS_NFSV4(fscp,
1912 		("cachefs_readlink (nfsv4): cnode %p, backvp %p\n",
1913 		cp, cp->c_backvp));
1914 	error = VOP_READLINK(cp->c_backvp, &uio, cr, NULL);
1915 	if (error) {
1916 		cachefs_kmem_free(buf, MAXPATHLEN);
1917 	} else {
1918 		*bufp = buf;
1919 		/*LINTED alignment okay*/
1920 		*buflenp = MAXPATHLEN - (int)uio.uio_resid;
1921 	}
1922 
1923 	return (error);
1924 }
1925 
1926 int
1927 cachefs_getbackvp(struct fscache *fscp, struct cnode *cp)
1928 {
1929 	int error = 0;
1930 	int flag;
1931 
1932 #ifdef CFSDEBUG
1933 	CFS_DEBUG(CFSDEBUG_CHEAT | CFSDEBUG_BACK)
1934 		printf("cachefs_getbackvp: ENTER fscp %p cp %p\n",
1935 		    (void *)fscp, (void *)cp);
1936 #endif
1937 	ASSERT(cp != NULL);
1938 	ASSERT(MUTEX_HELD(&cp->c_statelock));
1939 	ASSERT(cp->c_backvp == NULL);
1940 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
1941 
1942 	/*
1943 	 * If destroy is set then the last link to a file has been
1944 	 * removed.  Oddly enough NFS will still return a vnode
1945 	 * for the file if the timeout has not expired.
1946 	 * This causes headaches for cachefs_push because the
1947 	 * vnode is really stale.
1948 	 * So we just short circuit the problem here.
1949 	 */
1950 	if (cp->c_flags & CN_DESTROY)
1951 		return (ESTALE);
1952 
1953 	ASSERT(fscp->fs_backvfsp);
1954 	if (fscp->fs_backvfsp == NULL)
1955 		return (ETIMEDOUT);
1956 	error = VFS_VGET(fscp->fs_backvfsp, &cp->c_backvp,
1957 	    (struct fid *)&cp->c_cookie);
1958 	if (cp->c_backvp && cp->c_cred &&
1959 	    ((cp->c_flags & CN_NEEDOPEN) || (cp->c_attr.va_type == VREG))) {
1960 		/*
1961 		 * XXX bob: really should pass in the correct flag,
1962 		 * fortunately nobody pays attention to it
1963 		 */
1964 		flag = 0;
1965 		/*
1966 		 * If NEEDOOPEN is set, then this file was opened VOP_OPEN'd
1967 		 * but the backvp was not.  So, for the sake of the vnode
1968 		 * open counts used by delegation, we need to OPEN the backvp
1969 		 * with the same flags that were used for this cnode.  That way
1970 		 * when the file is VOP_CLOSE'd the counts won't go negative.
1971 		 */
1972 		if (cp->c_flags & CN_NEEDOPEN) {
1973 			cp->c_flags &= ~CN_NEEDOPEN;
1974 			if (cp->c_rdcnt > 0) {
1975 				cp->c_rdcnt--;
1976 				flag |= FREAD;
1977 			}
1978 			if (cp->c_wrcnt > 0) {
1979 				cp->c_wrcnt--;
1980 				flag |= FWRITE;
1981 			}
1982 		}
1983 		error = VOP_OPEN(&cp->c_backvp, flag, cp->c_cred, NULL);
1984 		if (error) {
1985 			VN_RELE(cp->c_backvp);
1986 			cp->c_backvp = NULL;
1987 		}
1988 	}
1989 
1990 #ifdef CFSDEBUG
1991 	CFS_DEBUG(CFSDEBUG_GENERAL | CFSDEBUG_BACK) {
1992 		if (error || cp->c_backvp == NULL) {
1993 			printf("Stale cookie cp %p fileno %llu type %d \n",
1994 			    (void *)cp, (u_longlong_t)cp->c_id.cid_fileno,
1995 			    CTOV(cp)->v_type);
1996 		}
1997 	}
1998 #endif
1999 
2000 #ifdef CFSDEBUG
2001 	CFS_DEBUG(CFSDEBUG_CHEAT | CFSDEBUG_BACK)
2002 		printf("cachefs_getbackvp: EXIT error = %d\n", error);
2003 #endif
2004 	return (error);
2005 }
2006 
2007 int
2008 cachefs_getcookie(
2009 	vnode_t *vp,
2010 	struct fid *cookiep,
2011 	struct vattr *attrp,
2012 	cred_t *cr,
2013 	uint32_t valid_fid)
2014 {
2015 	int error = 0;
2016 
2017 #ifdef CFSDEBUG
2018 	CFS_DEBUG(CFSDEBUG_CHEAT)
2019 		printf("cachefs_getcookie: ENTER vp %p\n", (void *)vp);
2020 #endif
2021 	/*
2022 	 * Get the FID only if the caller has indicated it is valid,
2023 	 * otherwise, zero the cookie.
2024 	 */
2025 	if (valid_fid) {
2026 		/*
2027 		 * This assumes that the cookie is a full size fid, if we go to
2028 		 * variable length fids we will need to change this.
2029 		 */
2030 		cookiep->fid_len = MAXFIDSZ;
2031 		error = VOP_FID(vp, cookiep, NULL);
2032 	} else {
2033 		bzero(cookiep, sizeof (*cookiep));
2034 	}
2035 
2036 	if (!error) {
2037 		if (attrp) {
2038 			ASSERT(attrp != NULL);
2039 			attrp->va_mask = AT_ALL;
2040 			error = VOP_GETATTR(vp, attrp, 0, cr, NULL);
2041 		}
2042 	} else {
2043 		if (error == ENOSPC) {
2044 			/*
2045 			 * This is an indication that the underlying filesystem
2046 			 * needs a bigger fid.  For now just map to EINVAL.
2047 			 */
2048 			error = EINVAL;
2049 		}
2050 	}
2051 #ifdef CFSDEBUG
2052 	CFS_DEBUG(CFSDEBUG_CHEAT)
2053 		printf("cachefs_getcookie: EXIT error = %d\n", error);
2054 #endif
2055 	return (error);
2056 }
2057 
2058 void
2059 cachefs_workq_init(struct cachefs_workq *qp)
2060 {
2061 	qp->wq_head = qp->wq_tail = NULL;
2062 	qp->wq_length =
2063 	    qp->wq_thread_count =
2064 	    qp->wq_max_len =
2065 	    qp->wq_halt_request = 0;
2066 	qp->wq_keepone = 0;
2067 	cv_init(&qp->wq_req_cv, NULL, CV_DEFAULT, NULL);
2068 	cv_init(&qp->wq_halt_cv, NULL, CV_DEFAULT, NULL);
2069 	mutex_init(&qp->wq_queue_lock, NULL, MUTEX_DEFAULT, NULL);
2070 }
2071 
2072 /*
2073  * return non-zero if it's `okay' to queue more requests (policy)
2074  */
2075 
2076 static int cachefs_async_max = 512;
2077 static int cachefs_async_count = 0;
2078 kmutex_t cachefs_async_lock;
2079 
2080 int
2081 cachefs_async_okay(void)
2082 {
2083 	/*
2084 	 * a value of -1 for max means to ignore freemem
2085 	 */
2086 
2087 	if (cachefs_async_max == -1)
2088 		return (1);
2089 
2090 	if (freemem < minfree)
2091 		return (0);
2092 
2093 	/*
2094 	 * a value of 0 for max means no arbitrary limit (only `freemen')
2095 	 */
2096 
2097 	if (cachefs_async_max == 0)
2098 		return (1);
2099 
2100 	ASSERT(cachefs_async_max > 0);
2101 
2102 	/*
2103 	 * check the global count against the max.
2104 	 *
2105 	 * we don't need to grab cachefs_async_lock -- we're just
2106 	 * looking, and a little bit of `fuzz' is okay.
2107 	 */
2108 
2109 	if (cachefs_async_count >= cachefs_async_max)
2110 		return (0);
2111 
2112 	return (1);
2113 }
2114 
2115 void
2116 cachefs_async_start(struct cachefs_workq *qp)
2117 {
2118 	struct cachefs_req *rp;
2119 	int left;
2120 	callb_cpr_t cprinfo;
2121 
2122 	CALLB_CPR_INIT(&cprinfo, &qp->wq_queue_lock, callb_generic_cpr, "cas");
2123 	mutex_enter(&qp->wq_queue_lock);
2124 	left = 1;
2125 	for (;;) {
2126 		/* if there are no pending requests */
2127 		if ((qp->wq_head == NULL) && (qp->wq_logwork == 0)) {
2128 			/* see if thread should exit */
2129 			if (qp->wq_halt_request || (left == -1)) {
2130 				if ((qp->wq_thread_count > 1) ||
2131 				    (qp->wq_keepone == 0))
2132 					break;
2133 			}
2134 
2135 			/* wake up thread in async_halt if necessary */
2136 			if (qp->wq_halt_request)
2137 				cv_broadcast(&qp->wq_halt_cv);
2138 
2139 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
2140 			/* sleep until there is something to do */
2141 			left = cv_timedwait(&qp->wq_req_cv,
2142 				&qp->wq_queue_lock, CFS_ASYNC_TIMEOUT + lbolt);
2143 			CALLB_CPR_SAFE_END(&cprinfo,
2144 				&qp->wq_queue_lock);
2145 			if ((qp->wq_head == NULL) && (qp->wq_logwork == 0))
2146 				continue;
2147 		}
2148 		left = 1;
2149 
2150 		if (qp->wq_logwork) {
2151 			qp->wq_logwork = 0;
2152 			mutex_exit(&qp->wq_queue_lock);
2153 			cachefs_log_process_queue(qp->wq_cachep, 1);
2154 			mutex_enter(&qp->wq_queue_lock);
2155 			continue;
2156 		}
2157 
2158 		/* remove request from the list */
2159 		rp = qp->wq_head;
2160 		qp->wq_head = rp->cfs_next;
2161 		if (rp->cfs_next == NULL)
2162 			qp->wq_tail = NULL;
2163 
2164 		/* do the request */
2165 		mutex_exit(&qp->wq_queue_lock);
2166 		cachefs_do_req(rp);
2167 		mutex_enter(&qp->wq_queue_lock);
2168 
2169 		/* decrement count of requests */
2170 		qp->wq_length--;
2171 		mutex_enter(&cachefs_async_lock);
2172 		--cachefs_async_count;
2173 		mutex_exit(&cachefs_async_lock);
2174 	}
2175 	ASSERT(qp->wq_head == NULL);
2176 	qp->wq_thread_count--;
2177 	if (qp->wq_halt_request && qp->wq_thread_count == 0)
2178 		cv_broadcast(&qp->wq_halt_cv);
2179 	CALLB_CPR_EXIT(&cprinfo);
2180 	thread_exit();
2181 	/*NOTREACHED*/
2182 }
2183 
2184 /*
2185  * attempt to halt all the async threads associated with a given workq
2186  */
2187 int
2188 cachefs_async_halt(struct cachefs_workq *qp, int force)
2189 {
2190 	int error = 0;
2191 	clock_t tend;
2192 
2193 	mutex_enter(&qp->wq_queue_lock);
2194 	if (force)
2195 		qp->wq_keepone = 0;
2196 
2197 	if (qp->wq_thread_count > 0) {
2198 		qp->wq_halt_request++;
2199 		cv_broadcast(&qp->wq_req_cv);
2200 		tend = lbolt + (60 * hz);
2201 		(void) cv_timedwait(&qp->wq_halt_cv,
2202 			&qp->wq_queue_lock, tend);
2203 		qp->wq_halt_request--;
2204 		if (qp->wq_thread_count > 0) {
2205 			if ((qp->wq_thread_count == 1) &&
2206 			    (qp->wq_length == 0) && qp->wq_keepone)
2207 				error = EAGAIN;
2208 			else
2209 				error = EBUSY;
2210 		} else {
2211 			ASSERT(qp->wq_length == 0 && qp->wq_head == NULL);
2212 		}
2213 	}
2214 	mutex_exit(&qp->wq_queue_lock);
2215 	return (error);
2216 }
2217 
2218 void
2219 cachefs_addqueue(struct cachefs_req *rp, struct cachefs_workq *qp)
2220 {
2221 	mutex_enter(&qp->wq_queue_lock);
2222 	if (qp->wq_thread_count < cachefs_max_threads) {
2223 		if (qp->wq_thread_count == 0 ||
2224 		    (qp->wq_length >= (qp->wq_thread_count * 2))) {
2225 			(void) thread_create(NULL, 0, cachefs_async_start,
2226 			    qp, 0, &p0, TS_RUN, minclsyspri);
2227 			qp->wq_thread_count++;
2228 		}
2229 	}
2230 	mutex_enter(&rp->cfs_req_lock);
2231 	if (qp->wq_tail)
2232 		qp->wq_tail->cfs_next = rp;
2233 	else
2234 		qp->wq_head = rp;
2235 	qp->wq_tail = rp;
2236 	rp->cfs_next = NULL;
2237 	qp->wq_length++;
2238 	if (qp->wq_length > qp->wq_max_len)
2239 		qp->wq_max_len = qp->wq_length;
2240 	mutex_enter(&cachefs_async_lock);
2241 	++cachefs_async_count;
2242 	mutex_exit(&cachefs_async_lock);
2243 
2244 	cv_signal(&qp->wq_req_cv);
2245 	mutex_exit(&rp->cfs_req_lock);
2246 	mutex_exit(&qp->wq_queue_lock);
2247 }
2248 
2249 void
2250 cachefs_async_putpage(struct cachefs_putpage_req *prp, cred_t *cr)
2251 {
2252 	struct cnode *cp = VTOC(prp->cp_vp);
2253 
2254 	ASSERT(CFS_ISFS_BACKFS_NFSV4(C_TO_FSCACHE(cp)) == 0);
2255 
2256 	(void) VOP_PUTPAGE(prp->cp_vp, prp->cp_off, prp->cp_len,
2257 		prp->cp_flags, cr, NULL);
2258 
2259 	mutex_enter(&cp->c_iomutex);
2260 	if (--cp->c_nio == 0)
2261 		cv_broadcast(&cp->c_iocv);
2262 	if (prp->cp_off == 0 && prp->cp_len == 0 &&
2263 	    (cp->c_ioflags & CIO_PUTPAGES)) {
2264 		cp->c_ioflags &= ~CIO_PUTPAGES;
2265 	}
2266 	mutex_exit(&cp->c_iomutex);
2267 }
2268 
2269 void
2270 cachefs_async_populate(struct cachefs_populate_req *pop, cred_t *cr)
2271 {
2272 	struct cnode *cp = VTOC(pop->cpop_vp);
2273 	struct fscache *fscp = C_TO_FSCACHE(cp);
2274 	struct filegrp *fgp = cp->c_filegrp;
2275 	int error = 0; /* not returned -- used as a place-holder */
2276 	vnode_t *frontvp = NULL, *backvp = NULL;
2277 	int havelock = 0;
2278 	vattr_t va;
2279 
2280 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
2281 
2282 	if (((cp->c_filegrp->fg_flags & CFS_FG_WRITE) == 0) ||
2283 	    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
2284 		mutex_enter(&cp->c_statelock);
2285 		cp->c_flags &= ~CN_ASYNC_POPULATE;
2286 		mutex_exit(&cp->c_statelock);
2287 		return; /* goto out */
2288 	}
2289 
2290 	error = cachefs_cd_access(fscp, 0, 0);
2291 	if (error) {
2292 #ifdef CFSDEBUG
2293 		CFS_DEBUG(CFSDEBUG_ASYNCPOP)
2294 			printf("async_pop: cd_access: err %d con %d\n",
2295 			    error, fscp->fs_cdconnected);
2296 #endif /* CFSDEBUG */
2297 		mutex_enter(&cp->c_statelock);
2298 		cp->c_flags &= ~CN_ASYNC_POPULATE;
2299 		mutex_exit(&cp->c_statelock);
2300 		return; /* goto out */
2301 	}
2302 
2303 	/*
2304 	 * grab the statelock for some minimal things
2305 	 */
2306 
2307 	rw_enter(&cp->c_rwlock, RW_WRITER);
2308 	mutex_enter(&cp->c_statelock);
2309 	havelock = 1;
2310 
2311 	if ((cp->c_flags & CN_ASYNC_POPULATE) == 0)
2312 		goto out;
2313 
2314 	/* there can be only one */
2315 	ASSERT((cp->c_flags & CN_ASYNC_POP_WORKING) == 0);
2316 	cp->c_flags |= CN_ASYNC_POP_WORKING;
2317 	cp->c_popthrp = curthread;
2318 
2319 	if (cp->c_metadata.md_flags & MD_POPULATED)
2320 		goto out;
2321 
2322 	if (cp->c_flags & CN_NOCACHE) {
2323 #ifdef CFSDEBUG
2324 		CFS_DEBUG(CFSDEBUG_ASYNCPOP)
2325 			printf("cachefs_async_populate: nocache bit on\n");
2326 #endif /* CFSDEBUG */
2327 		error = EINVAL;
2328 		goto out;
2329 	}
2330 
2331 	if (cp->c_frontvp == NULL) {
2332 		if ((cp->c_metadata.md_flags & MD_FILE) == 0) {
2333 			struct cfs_cid cid = cp->c_id;
2334 
2335 			mutex_exit(&cp->c_statelock);
2336 			havelock = 0;
2337 
2338 			/*
2339 			 * if frontfile doesn't exist, drop the lock
2340 			 * to do some of the file creation stuff.
2341 			 */
2342 
2343 			if (fgp->fg_flags & CFS_FG_ALLOC_ATTR) {
2344 				error = filegrp_allocattr(fgp);
2345 				if (error != 0)
2346 					goto out;
2347 			}
2348 			if (fgp->fg_flags & CFS_FG_ALLOC_FILE) {
2349 				mutex_enter(&fgp->fg_mutex);
2350 				if (fgp->fg_flags & CFS_FG_ALLOC_FILE) {
2351 					if (fgp->fg_header->ach_nffs == 0)
2352 						error = filegrpdir_create(fgp);
2353 					else
2354 						error = filegrpdir_find(fgp);
2355 					if (error != 0) {
2356 						mutex_exit(&fgp->fg_mutex);
2357 						goto out;
2358 					}
2359 				}
2360 				mutex_exit(&fgp->fg_mutex);
2361 			}
2362 
2363 			if (fgp->fg_dirvp != NULL) {
2364 				char name[CFS_FRONTFILE_NAME_SIZE];
2365 				struct vattr *attrp;
2366 
2367 				attrp = cachefs_kmem_zalloc(
2368 				    sizeof (struct vattr), KM_SLEEP);
2369 				attrp->va_mode = S_IFREG | 0666;
2370 				attrp->va_uid = 0;
2371 				attrp->va_gid = 0;
2372 				attrp->va_type = VREG;
2373 				attrp->va_size = 0;
2374 				attrp->va_mask =
2375 				    AT_SIZE | AT_TYPE | AT_MODE |
2376 				    AT_UID | AT_GID;
2377 
2378 				make_ascii_name(&cid, name);
2379 
2380 				(void) VOP_CREATE(fgp->fg_dirvp, name, attrp,
2381 				    EXCL, 0666, &frontvp, kcred, 0, NULL, NULL);
2382 
2383 				cachefs_kmem_free(attrp,
2384 				    sizeof (struct vattr));
2385 			}
2386 
2387 			mutex_enter(&cp->c_statelock);
2388 			havelock = 1;
2389 		}
2390 		error = cachefs_getfrontfile(cp);
2391 		ASSERT((error != 0) ||
2392 		    (frontvp == NULL) ||
2393 		    (frontvp == cp->c_frontvp));
2394 	}
2395 	if ((error != 0) || (cp->c_frontvp == NULL))
2396 		goto out;
2397 
2398 	if (frontvp != NULL)
2399 		VN_RELE(frontvp);
2400 
2401 	frontvp = cp->c_frontvp;
2402 	VN_HOLD(frontvp);
2403 
2404 	if (cp->c_backvp == NULL) {
2405 		error = cachefs_getbackvp(fscp, cp);
2406 		if ((error != 0) || (cp->c_backvp == NULL))
2407 			goto out;
2408 	}
2409 	backvp = cp->c_backvp;
2410 	VN_HOLD(backvp);
2411 
2412 	switch (pop->cpop_vp->v_type) {
2413 	case VREG:
2414 		mutex_exit(&cp->c_statelock);
2415 		havelock = 0;
2416 		error = cachefs_async_populate_reg(pop, cr, backvp, frontvp);
2417 		break;
2418 	case VDIR:
2419 		error = cachefs_async_populate_dir(pop, cr, backvp, frontvp);
2420 		mutex_exit(&cp->c_statelock);
2421 		havelock = 0;
2422 		break;
2423 	default:
2424 #ifdef CFSDEBUG
2425 		printf("cachefs_async_populate: warning: vnode type = %d\n",
2426 		    pop->cpop_vp->v_type);
2427 		ASSERT(0);
2428 #endif /* CFSDEBUG */
2429 		error = EINVAL;
2430 		break;
2431 	}
2432 
2433 	if (error != 0)
2434 		goto out;
2435 
2436 	error = VOP_FSYNC(frontvp, FSYNC, cr, NULL);
2437 	if (error != 0) {
2438 #ifdef CFSDEBUG
2439 		CFS_DEBUG(CFSDEBUG_ASYNCPOP)
2440 			printf("cachefs_async_populate: fsync\n");
2441 #endif /* CFSDEBUG */
2442 		goto out;
2443 	}
2444 
2445 	/* grab the lock and finish up */
2446 	mutex_enter(&cp->c_statelock);
2447 	havelock = 1;
2448 
2449 	/* if went nocache while lock was dropped, get out */
2450 	if ((cp->c_flags & CN_NOCACHE) || (cp->c_frontvp == NULL)) {
2451 		error = EINVAL;
2452 		goto out;
2453 	}
2454 
2455 	va.va_mask = AT_MTIME;
2456 	error = VOP_GETATTR(cp->c_frontvp, &va, 0, cr, NULL);
2457 	if (error) {
2458 #ifdef CFSDEBUG
2459 		CFS_DEBUG(CFSDEBUG_ASYNCPOP)
2460 			printf("cachefs_async_populate: getattr\n");
2461 #endif /* CFSDEBUG */
2462 		goto out;
2463 	}
2464 	cp->c_metadata.md_timestamp = va.va_mtime;
2465 	cp->c_metadata.md_flags |= MD_POPULATED;
2466 	cp->c_metadata.md_flags &= ~MD_INVALREADDIR;
2467 	cp->c_flags |= CN_UPDATED;
2468 
2469 out:
2470 	if (! havelock)
2471 		mutex_enter(&cp->c_statelock);
2472 
2473 	/* see if an error happened behind our backs */
2474 	if ((error == 0) && (cp->c_flags & CN_NOCACHE)) {
2475 #ifdef CFSDEBUG
2476 		CFS_DEBUG(CFSDEBUG_ASYNCPOP)
2477 			printf("cachefs_async_populate: "
2478 			    "nocache behind our backs\n");
2479 #endif /* CFSDEBUG */
2480 		error = EINVAL;
2481 	}
2482 
2483 	cp->c_flags &= ~(CN_NEED_FRONT_SYNC | CN_POPULATION_PENDING |
2484 	    CN_ASYNC_POPULATE | CN_ASYNC_POP_WORKING);
2485 	cp->c_popthrp = NULL;
2486 
2487 	if (error != 0)
2488 		cachefs_nocache(cp);
2489 
2490 	/* unblock any threads waiting for populate to finish */
2491 	cv_broadcast(&cp->c_popcv);
2492 	mutex_exit(&cp->c_statelock);
2493 	rw_exit(&cp->c_rwlock);
2494 	cachefs_cd_release(fscp);
2495 
2496 	if (backvp != NULL) {
2497 		VN_RELE(backvp);
2498 	}
2499 	if (frontvp != NULL) {
2500 		VN_RELE(frontvp);
2501 	}
2502 }
2503 
2504 /*
2505  * only to be called from cachefs_async_populate
2506  */
2507 
2508 static int
2509 cachefs_async_populate_reg(struct cachefs_populate_req *pop, cred_t *cr,
2510     vnode_t *backvp, vnode_t *frontvp)
2511 {
2512 	struct cnode *cp = VTOC(pop->cpop_vp);
2513 	int error = 0;
2514 	u_offset_t popoff;
2515 	size_t popsize;
2516 
2517 	cachefs_cluster_allocmap(pop->cpop_off, &popoff,
2518 	    &popsize, pop->cpop_size, cp);
2519 	if (popsize == 0) {
2520 #ifdef CFSDEBUG
2521 		CFS_DEBUG(CFSDEBUG_ASYNCPOP)
2522 			printf("cachefs_async_populate: popsize == 0\n");
2523 #endif /* CFSDEBUG */
2524 		goto out;
2525 	}
2526 
2527 	error = cachefs_populate(cp, popoff, popsize, frontvp, backvp,
2528 	    cp->c_size, cr);
2529 	if (error != 0) {
2530 #ifdef CFSDEBUG
2531 		CFS_DEBUG(CFSDEBUG_ASYNCPOP)
2532 			printf("cachefs_async_populate: cachefs_populate\n");
2533 #endif /* CFSDEBUG */
2534 		goto out;
2535 	}
2536 
2537 out:
2538 	return (error);
2539 }
2540 
2541 void
2542 cachefs_do_req(struct cachefs_req *rp)
2543 {
2544 	struct cachefscache *cachep;
2545 
2546 	mutex_enter(&rp->cfs_req_lock);
2547 	switch (rp->cfs_cmd) {
2548 	case CFS_INVALID:
2549 		panic("cachefs_do_req: CFS_INVALID operation on queue");
2550 		/*NOTREACHED*/
2551 	case CFS_CACHE_SYNC:
2552 		cachep = rp->cfs_req_u.cu_fs_sync.cf_cachep;
2553 		cachefs_cache_sync(cachep);
2554 		break;
2555 	case CFS_IDLE:
2556 		cachefs_cnode_idle(rp->cfs_req_u.cu_idle.ci_vp, rp->cfs_cr);
2557 		break;
2558 	case CFS_PUTPAGE:
2559 		cachefs_async_putpage(&rp->cfs_req_u.cu_putpage, rp->cfs_cr);
2560 		VN_RELE(rp->cfs_req_u.cu_putpage.cp_vp);
2561 		break;
2562 	case CFS_POPULATE:
2563 		cachefs_async_populate(&rp->cfs_req_u.cu_populate, rp->cfs_cr);
2564 		VN_RELE(rp->cfs_req_u.cu_populate.cpop_vp);
2565 		break;
2566 	case CFS_NOOP:
2567 		break;
2568 	default:
2569 		panic("c_do_req: Invalid CFS async operation");
2570 	}
2571 	crfree(rp->cfs_cr);
2572 	rp->cfs_cmd = CFS_INVALID;
2573 	mutex_exit(&rp->cfs_req_lock);
2574 	kmem_cache_free(cachefs_req_cache, rp);
2575 }
2576 
2577 
2578 
2579 
2580 ssize_t cachefs_mem_usage = 0;
2581 
2582 struct km_wrap {
2583 	size_t kw_size;
2584 	struct km_wrap *kw_other;
2585 };
2586 
2587 kmutex_t cachefs_kmem_lock;
2588 
2589 void *
2590 cachefs_kmem_alloc(size_t size, int flag)
2591 {
2592 #ifdef DEBUG
2593 	caddr_t mp = NULL;
2594 	struct km_wrap *kwp;
2595 	size_t n = (size + (2 * sizeof (struct km_wrap)) + 7) & ~7;
2596 
2597 	ASSERT(n >= (size + 8));
2598 	mp = kmem_alloc(n, flag);
2599 	if (mp == NULL) {
2600 		return (NULL);
2601 	}
2602 	/*LINTED alignment okay*/
2603 	kwp = (struct km_wrap *)mp;
2604 	kwp->kw_size = n;
2605 	/*LINTED alignment okay*/
2606 	kwp->kw_other = (struct km_wrap *)(mp + n - sizeof (struct km_wrap));
2607 	kwp = (struct km_wrap *)kwp->kw_other;
2608 	kwp->kw_size = n;
2609 	/*LINTED alignment okay*/
2610 	kwp->kw_other = (struct km_wrap *)mp;
2611 
2612 	mutex_enter(&cachefs_kmem_lock);
2613 	ASSERT(cachefs_mem_usage >= 0);
2614 	cachefs_mem_usage += n;
2615 	mutex_exit(&cachefs_kmem_lock);
2616 
2617 	return (mp + sizeof (struct km_wrap));
2618 #else /* DEBUG */
2619 	return (kmem_alloc(size, flag));
2620 #endif /* DEBUG */
2621 }
2622 
2623 void *
2624 cachefs_kmem_zalloc(size_t size, int flag)
2625 {
2626 #ifdef DEBUG
2627 	caddr_t mp = NULL;
2628 	struct km_wrap *kwp;
2629 	size_t n = (size + (2 * sizeof (struct km_wrap)) + 7) & ~7;
2630 
2631 	ASSERT(n >= (size + 8));
2632 	mp = kmem_zalloc(n, flag);
2633 	if (mp == NULL) {
2634 		return (NULL);
2635 	}
2636 	/*LINTED alignment okay*/
2637 	kwp = (struct km_wrap *)mp;
2638 	kwp->kw_size = n;
2639 	/*LINTED alignment okay*/
2640 	kwp->kw_other = (struct km_wrap *)(mp + n - sizeof (struct km_wrap));
2641 	kwp = (struct km_wrap *)kwp->kw_other;
2642 	kwp->kw_size = n;
2643 	/*LINTED alignment okay*/
2644 	kwp->kw_other = (struct km_wrap *)mp;
2645 
2646 	mutex_enter(&cachefs_kmem_lock);
2647 	ASSERT(cachefs_mem_usage >= 0);
2648 	cachefs_mem_usage += n;
2649 	mutex_exit(&cachefs_kmem_lock);
2650 
2651 	return (mp + sizeof (struct km_wrap));
2652 #else /* DEBUG */
2653 	return (kmem_zalloc(size, flag));
2654 #endif /* DEBUG */
2655 }
2656 
2657 void
2658 cachefs_kmem_free(void *mp, size_t size)
2659 {
2660 #ifdef DEBUG
2661 	struct km_wrap *front_kwp;
2662 	struct km_wrap *back_kwp;
2663 	size_t n = (size + (2 * sizeof (struct km_wrap)) + 7) & ~7;
2664 	void *p;
2665 
2666 	ASSERT(n >= (size + 8));
2667 	front_kwp = (struct km_wrap *)((uintptr_t)mp - sizeof (struct km_wrap));
2668 	back_kwp = (struct km_wrap *)
2669 		((uintptr_t)front_kwp + n - sizeof (struct km_wrap));
2670 
2671 	ASSERT(front_kwp->kw_other == back_kwp);
2672 	ASSERT(front_kwp->kw_size == n);
2673 	ASSERT(back_kwp->kw_other == front_kwp);
2674 	ASSERT(back_kwp->kw_size == n);
2675 
2676 	mutex_enter(&cachefs_kmem_lock);
2677 	cachefs_mem_usage -= n;
2678 	ASSERT(cachefs_mem_usage >= 0);
2679 	mutex_exit(&cachefs_kmem_lock);
2680 
2681 	p = front_kwp;
2682 	front_kwp->kw_size = back_kwp->kw_size = 0;
2683 	front_kwp->kw_other = back_kwp->kw_other = NULL;
2684 	kmem_free(p, n);
2685 #else /* DEBUG */
2686 	kmem_free(mp, size);
2687 #endif /* DEBUG */
2688 }
2689 
2690 char *
2691 cachefs_strdup(char *s)
2692 {
2693 	char *rc;
2694 
2695 	ASSERT(s != NULL);
2696 
2697 	rc = cachefs_kmem_alloc(strlen(s) + 1, KM_SLEEP);
2698 	(void) strcpy(rc, s);
2699 
2700 	return (rc);
2701 }
2702 
2703 int
2704 cachefs_stats_kstat_snapshot(kstat_t *ksp, void *buf, int rw)
2705 {
2706 	struct fscache *fscp = (struct fscache *)ksp->ks_data;
2707 	cachefscache_t *cachep = fscp->fs_cache;
2708 	int	error = 0;
2709 
2710 	if (rw == KSTAT_WRITE) {
2711 		bcopy(buf, &fscp->fs_stats, sizeof (fscp->fs_stats));
2712 		cachep->c_gc_count = fscp->fs_stats.st_gc_count;
2713 		CACHEFS_CFS_TIME_TO_TIME_COPY(fscp->fs_stats.st_gc_time,
2714 			cachep->c_gc_time);
2715 		CACHEFS_CFS_TIME_TO_TIME_COPY(fscp->fs_stats.st_gc_before_atime,
2716 			cachep->c_gc_before);
2717 		CACHEFS_CFS_TIME_TO_TIME_COPY(fscp->fs_stats.st_gc_after_atime,
2718 			cachep->c_gc_after);
2719 		return (error);
2720 	}
2721 
2722 	fscp->fs_stats.st_gc_count = cachep->c_gc_count;
2723 	CACHEFS_TIME_TO_CFS_TIME_COPY(cachep->c_gc_time,
2724 			fscp->fs_stats.st_gc_time, error);
2725 	CACHEFS_TIME_TO_CFS_TIME_COPY(cachep->c_gc_before,
2726 			fscp->fs_stats.st_gc_before_atime, error);
2727 	CACHEFS_TIME_TO_CFS_TIME_COPY(cachep->c_gc_after,
2728 			fscp->fs_stats.st_gc_after_atime, error);
2729 	bcopy(&fscp->fs_stats, buf, sizeof (fscp->fs_stats));
2730 
2731 	return (error);
2732 }
2733 
2734 #ifdef DEBUG
2735 cachefs_debug_info_t *
2736 cachefs_debug_save(cachefs_debug_info_t *oldcdb, int chain,
2737     char *message, uint_t flags, int number, void *pointer,
2738     cachefscache_t *cachep, struct fscache *fscp, struct cnode *cp)
2739 {
2740 	cachefs_debug_info_t *cdb;
2741 
2742 	if ((chain) || (oldcdb == NULL))
2743 		cdb = cachefs_kmem_zalloc(sizeof (*cdb), KM_SLEEP);
2744 	else
2745 		cdb = oldcdb;
2746 	if (chain)
2747 		cdb->cdb_next = oldcdb;
2748 
2749 	if (message != NULL) {
2750 		if (cdb->cdb_message != NULL)
2751 			cachefs_kmem_free(cdb->cdb_message,
2752 			    strlen(cdb->cdb_message) + 1);
2753 		cdb->cdb_message = cachefs_kmem_alloc(strlen(message) + 1,
2754 		    KM_SLEEP);
2755 		(void) strcpy(cdb->cdb_message, message);
2756 	}
2757 	cdb->cdb_flags = flags;
2758 	cdb->cdb_int = number;
2759 	cdb->cdb_pointer = pointer;
2760 
2761 	cdb->cdb_count++;
2762 
2763 	cdb->cdb_cnode = cp;
2764 	if (cp != NULL) {
2765 		cdb->cdb_frontvp = cp->c_frontvp;
2766 		cdb->cdb_backvp = cp->c_backvp;
2767 	}
2768 	if (fscp != NULL)
2769 		cdb->cdb_fscp = fscp;
2770 	else if (cp != NULL)
2771 		cdb->cdb_fscp = C_TO_FSCACHE(cp);
2772 	if (cachep != NULL)
2773 		cdb->cdb_cachep = cachep;
2774 	else if (cdb->cdb_fscp != NULL)
2775 		cdb->cdb_cachep = cdb->cdb_fscp->fs_cache;
2776 
2777 	cdb->cdb_thread = curthread;
2778 	cdb->cdb_timestamp = gethrtime();
2779 	cdb->cdb_depth = getpcstack(cdb->cdb_stack, CACHEFS_DEBUG_DEPTH);
2780 
2781 	return (cdb);
2782 }
2783 
2784 void
2785 cachefs_debug_show(cachefs_debug_info_t *cdb)
2786 {
2787 	hrtime_t now = gethrtime();
2788 	timestruc_t ts;
2789 	int i;
2790 
2791 	while (cdb != NULL) {
2792 		hrt2ts(now - cdb->cdb_timestamp, &ts);
2793 		printf("cdb: %p count: %d timelapse: %ld.%9ld\n",
2794 		    (void *)cdb, cdb->cdb_count, ts.tv_sec, ts.tv_nsec);
2795 		if (cdb->cdb_message != NULL)
2796 			printf("message: %s", cdb->cdb_message);
2797 		printf("flags: %x int: %d pointer: %p\n",
2798 		    cdb->cdb_flags, cdb->cdb_int, (void *)cdb->cdb_pointer);
2799 
2800 		printf("cnode: %p fscp: %p cachep: %p\n",
2801 		    (void *)cdb->cdb_cnode,
2802 		    (void *)cdb->cdb_fscp, (void *)cdb->cdb_cachep);
2803 		printf("frontvp: %p backvp: %p\n",
2804 		    (void *)cdb->cdb_frontvp, (void *)cdb->cdb_backvp);
2805 
2806 		printf("thread: %p stack...\n", (void *)cdb->cdb_thread);
2807 		for (i = 0; i < cdb->cdb_depth; i++) {
2808 			ulong_t off;
2809 			char *sym;
2810 
2811 			sym = kobj_getsymname(cdb->cdb_stack[i], &off);
2812 			printf("%s+%lx\n", sym ? sym : "?", off);
2813 		}
2814 		delay(2*hz);
2815 		cdb = cdb->cdb_next;
2816 	}
2817 	debug_enter(NULL);
2818 }
2819 #endif /* DEBUG */
2820 
2821 /*
2822  * Changes the size of the front file.
2823  * Returns 0 for success or error if cannot set file size.
2824  * NOCACHE bit is ignored.
2825  * c_size is ignored.
2826  * statelock must be held, frontvp must be set.
2827  * File must be populated if setting to a size other than zero.
2828  */
2829 int
2830 cachefs_frontfile_size(cnode_t *cp, u_offset_t length)
2831 {
2832 	cachefscache_t *cachep = C_TO_FSCACHE(cp)->fs_cache;
2833 	vattr_t va;
2834 	size_t nblks, blkdelta;
2835 	int error = 0;
2836 	int alloc = 0;
2837 	struct cachefs_allocmap *allocp;
2838 
2839 	ASSERT(MUTEX_HELD(&cp->c_statelock));
2840 	ASSERT(cp->c_frontvp);
2841 
2842 	/* if growing the file, allocate space first, we charge for holes */
2843 	if (length) {
2844 		ASSERT(cp->c_metadata.md_flags & MD_POPULATED);
2845 
2846 		nblks = (length + MAXBSIZE - 1) / MAXBSIZE;
2847 		if (nblks > cp->c_metadata.md_frontblks) {
2848 			blkdelta = nblks - cp->c_metadata.md_frontblks;
2849 			error = cachefs_allocblocks(cachep, blkdelta,
2850 			    cp->c_metadata.md_rltype);
2851 			if (error)
2852 				goto out;
2853 			alloc = 1;
2854 		}
2855 	}
2856 
2857 	/* change the size of the front file */
2858 	va.va_mask = AT_SIZE;
2859 	va.va_size = length;
2860 	error = VOP_SETATTR(cp->c_frontvp, &va, 0, kcred, NULL);
2861 	if (error)
2862 		goto out;
2863 
2864 	/* zero out the alloc map */
2865 	bzero(&cp->c_metadata.md_allocinfo,
2866 	    cp->c_metadata.md_allocents * sizeof (struct cachefs_allocmap));
2867 	cp->c_metadata.md_allocents = 0;
2868 
2869 	if (length == 0) {
2870 		/* free up blocks */
2871 		if (cp->c_metadata.md_frontblks) {
2872 			cachefs_freeblocks(cachep, cp->c_metadata.md_frontblks,
2873 			    cp->c_metadata.md_rltype);
2874 			cp->c_metadata.md_frontblks = 0;
2875 		}
2876 	} else {
2877 		/* update number of blocks if shrinking file */
2878 		nblks = (length + MAXBSIZE - 1) / MAXBSIZE;
2879 		if (nblks < cp->c_metadata.md_frontblks) {
2880 			blkdelta = cp->c_metadata.md_frontblks - nblks;
2881 			cachefs_freeblocks(cachep, blkdelta,
2882 			    cp->c_metadata.md_rltype);
2883 			cp->c_metadata.md_frontblks = (uint_t)nblks;
2884 		}
2885 
2886 		/* fix up alloc map to reflect new size */
2887 		allocp = cp->c_metadata.md_allocinfo;
2888 		allocp->am_start_off = 0;
2889 		allocp->am_size = length;
2890 		cp->c_metadata.md_allocents = 1;
2891 	}
2892 	cp->c_flags |= CN_UPDATED | CN_NEED_FRONT_SYNC;
2893 
2894 out:
2895 	if (error && alloc)
2896 		cachefs_freeblocks(cachep, blkdelta, cp->c_metadata.md_rltype);
2897 	return (error);
2898 }
2899 
2900 /*ARGSUSED*/
2901 int
2902 cachefs_req_create(void *voidp, void *cdrarg, int kmflags)
2903 {
2904 	struct cachefs_req *rp = (struct cachefs_req *)voidp;
2905 
2906 	/*
2907 	 * XXX don't do this!  if you need this, you can't use this
2908 	 * constructor.
2909 	 */
2910 
2911 	bzero(rp, sizeof (struct cachefs_req));
2912 
2913 	mutex_init(&rp->cfs_req_lock, NULL, MUTEX_DEFAULT, NULL);
2914 	return (0);
2915 }
2916 
2917 /*ARGSUSED*/
2918 void
2919 cachefs_req_destroy(void *voidp, void *cdrarg)
2920 {
2921 	struct cachefs_req *rp = (struct cachefs_req *)voidp;
2922 
2923 	mutex_destroy(&rp->cfs_req_lock);
2924 }
2925