xref: /titanic_41/usr/src/uts/common/fs/cachefs/cachefs_fscache.c (revision 5e989a96186a37eb528fb7bb4d28a150874ec799)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 #pragma ident	"%Z%%M%	%I%	%E% SMI"
26 
27 #include <sys/param.h>
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/file.h>
31 #include <sys/cred.h>
32 #include <sys/proc.h>
33 #include <sys/user.h>
34 #include <sys/vfs.h>
35 #include <sys/vnode.h>
36 #include <sys/pathname.h>
37 #include <sys/uio.h>
38 #include <sys/tiuser.h>
39 #include <sys/sysmacros.h>
40 #include <sys/kmem.h>
41 #include <sys/mount.h>
42 #include <sys/ioctl.h>
43 #include <sys/statvfs.h>
44 #include <sys/errno.h>
45 #include <sys/debug.h>
46 #include <sys/cmn_err.h>
47 #include <sys/utsname.h>
48 #include <sys/modctl.h>
49 #include <sys/stat.h>
50 #include <sys/fcntl.h>
51 #include <sys/fbuf.h>
52 #include <rpc/types.h>
53 
54 #include <vm/hat.h>
55 #include <vm/as.h>
56 #include <vm/page.h>
57 #include <vm/pvn.h>
58 #include <vm/seg.h>
59 #include <vm/seg_map.h>
60 #include <vm/seg_vn.h>
61 #include <vm/rm.h>
62 #include <sys/fs/cachefs_fs.h>
63 #include <sys/fs/cachefs_dlog.h>
64 #include <sys/fs/cachefs_ioctl.h>
65 
66 /* external references */
67 extern struct cachefsops nopcfsops, strictcfsops, codcfsops;
68 
69 /* forward references */
70 int fscdir_create(cachefscache_t *cachep, char *namep, fscache_t *fscp);
71 int fscdir_find(cachefscache_t *cachep, ino64_t fsid, fscache_t *fscp);
72 static int fscache_info_sync(fscache_t *fscp);
73 
74 struct kmem_cache *cachefs_fscache_cache = NULL;
75 
76 /*
77  * ------------------------------------------------------------------
78  *
79  *		fscache_create
80  *
81  * Description:
82  *	Creates a fscache object.
83  * Arguments:
84  *	cachep		cache to create fscache object for
85  * Returns:
86  *	Returns a fscache object.
87  * Preconditions:
88  *	precond(cachep)
89  */
90 
91 fscache_t *
92 fscache_create(cachefscache_t *cachep)
93 {
94 	fscache_t *fscp;
95 
96 	/* create and initialize the fscache object */
97 	fscp = kmem_cache_alloc(cachefs_fscache_cache, KM_SLEEP);
98 
99 	bzero(fscp, sizeof (*fscp));
100 
101 	mutex_init(&fscp->fs_fslock, NULL, MUTEX_DEFAULT, NULL);
102 	mutex_init(&fscp->fs_idlelock, NULL, MUTEX_DEFAULT, NULL);
103 	mutex_init(&fscp->fs_dlock, NULL, MUTEX_DEFAULT, NULL);
104 	mutex_init(&fscp->fs_cdlock, NULL, MUTEX_DEFAULT, NULL);
105 	cv_init(&fscp->fs_cdwaitcv, NULL, CV_DEFAULT, NULL);
106 
107 	fscp->fs_cache = cachep;
108 	fscp->fs_info.fi_mntflags = CFS_WRITE_AROUND;
109 	fscp->fs_info.fi_popsize = DEF_POP_SIZE;
110 	fscp->fs_info.fi_fgsize = DEF_FILEGRP_SIZE;
111 	fscp->fs_cfsops = &nopcfsops;
112 	fscp->fs_consttype = CFS_FS_CONST_NOCONST;
113 	fscp->fs_acregmin = 30;
114 	fscp->fs_acregmax = 30;
115 	fscp->fs_acdirmin = 30;
116 	fscp->fs_acdirmax = 30;
117 	fscp->fs_cdconnected = CFS_CD_CONNECTED;
118 	fscp->fs_mntpt = NULL;
119 	fscp->fs_hostname = NULL;
120 	fscp->fs_backfsname = NULL;
121 	cachefs_workq_init(&fscp->fs_workq);
122 	return (fscp);
123 }
124 
125 /*
126  * ------------------------------------------------------------------
127  *
128  *		fscache_destroy
129  *
130  * Description:
131  *	Destroys the fscache object.
132  * Arguments:
133  *	fscp	the fscache object to destroy
134  * Returns:
135  * Preconditions:
136  *	precond(fscp)
137  *	precond(fs_ref == 0)
138  */
139 
140 void
141 fscache_destroy(fscache_t *fscp)
142 {
143 	size_t strl;
144 
145 	ASSERT(fscp->fs_ref == 0);
146 
147 	(void) fscache_info_sync(fscp);
148 
149 	if (fscp->fs_mntpt) {
150 		strl = strlen(fscp->fs_mntpt);
151 		if (strl != 0)
152 			kmem_free(fscp->fs_mntpt, strl + 1);
153 	}
154 	if (fscp->fs_hostname) {
155 		strl = strlen(fscp->fs_hostname);
156 		if (strl != 0)
157 			kmem_free(fscp->fs_hostname, strl + 1);
158 	}
159 	if (fscp->fs_backfsname) {
160 		strl = strlen(fscp->fs_backfsname);
161 		if (strl != 0)
162 			kmem_free(fscp->fs_backfsname, strl + 1);
163 	}
164 
165 	/* drop the inum translation table */
166 	if (fscp->fs_inum_size > 0)
167 		cachefs_kmem_free(fscp->fs_inum_trans,
168 		    fscp->fs_inum_size * sizeof (cachefs_inum_trans_t));
169 
170 	/* drop references to the fscache directory */
171 	if (fscp->fs_fscdirvp)
172 		VN_RELE(fscp->fs_fscdirvp);
173 	if (fscp->fs_fsattrdir)
174 		VN_RELE(fscp->fs_fsattrdir);
175 	if (fscp->fs_infovp)
176 		VN_RELE(fscp->fs_infovp);
177 
178 	/* drop logging references */
179 	cachefs_dlog_teardown(fscp);
180 
181 	mutex_destroy(&fscp->fs_fslock);
182 	mutex_destroy(&fscp->fs_idlelock);
183 	mutex_destroy(&fscp->fs_dlock);
184 	mutex_destroy(&fscp->fs_cdlock);
185 	cv_destroy(&fscp->fs_cdwaitcv);
186 
187 	kmem_cache_free(cachefs_fscache_cache, fscp);
188 }
189 
190 /*
191  * ------------------------------------------------------------------
192  *
193  *		fscache_setup
194  *
195  * Description:
196  *	Activates a fscache by associating the fscache object
197  *	with on disk data.
198  *	If the fscache directory of the specified fsid exists then
199  *	it will be used.
200  *	Otherwise a new fscache directory will be created using namep
201  *	and optp with fsid being ignored.  However if namep or optp
202  *	are not NULL or the cache is in NOFILL then this routine fails.
203  * Arguments:
204  *	fscp	the fscache object to activate
205  *	fsid	unique identifier for the cache
206  *	namep	name of the cache
207  *	optp	options for the cache
208  * Returns:
209  *	Returns 0 for success, !0 on failure.
210  * Preconditions:
211  *	precond(fscp)
212  *	precond(the cache must not be in NOCACHE mode)
213  *	precond(the cache must not alread by active)
214  */
215 
216 static int
217 fscache_setup(fscache_t *fscp, ino64_t fsid, char *namep,
218     struct cachefsoptions *optp, ino64_t backfileno, int setflags)
219 {
220 	int error;
221 	cachefscache_t *cachep = fscp->fs_cache;
222 
223 	ASSERT((cachep->c_flags & CACHE_NOCACHE) == 0);
224 
225 	/* see if the fscache directory already exists */
226 	error =	fscdir_find(cachep, fsid, fscp);
227 	if (error) {
228 		/* return error if cannot create the directory */
229 		if ((namep == NULL) || (optp == NULL) ||
230 		    (cachep->c_flags & CACHE_NOFILL)) {
231 			return (error);
232 		}
233 		if (backfileno == 0)
234 			return (EAGAIN);
235 
236 		/* remember the root back fileno for disconnected mounts */
237 		fscp->fs_info.fi_root = backfileno;
238 
239 		/* copy options into the fscache */
240 		fscp->fs_info.fi_mntflags = optp->opt_flags;
241 		fscp->fs_info.fi_popsize = optp->opt_popsize;
242 		fscp->fs_info.fi_fgsize = optp->opt_fgsize;
243 		fscp->fs_flags |= CFS_FS_DIRTYINFO;
244 
245 		/* create the directory */
246 		error = fscdir_create(cachep, namep, fscp);
247 		if (error) {
248 			if (error == ENOSPC)
249 				cmn_err(CE_WARN,
250 				    "CacheFS: not enough space to create %s",
251 				    namep);
252 			else
253 				cmn_err(CE_WARN,
254 				    "CacheFS: error %d creating %s",
255 				    error, namep);
256 			return (error);
257 		}
258 	} else if (optp) {
259 		/* compare the options to make sure they are compatible */
260 		error = fscache_compare_options(fscp, optp);
261 		if (error) {
262 			cmn_err(CE_WARN,
263 				"CacheFS: mount failed, options do not match.");
264 			return (error);
265 		}
266 
267 		/* copy options into the fscache */
268 		fscp->fs_info.fi_mntflags = optp->opt_flags;
269 		fscp->fs_info.fi_popsize = optp->opt_popsize;
270 		fscp->fs_info.fi_fgsize = optp->opt_fgsize;
271 		fscp->fs_flags |= CFS_FS_DIRTYINFO;
272 
273 		/*
274 		 * The fileid of the root of the filesystem can change
275 		 * in NFSv4, so make sure we update the fi_root
276 		 * with the new filenumber.
277 		 */
278 		if (CFS_ISFS_BACKFS_NFSV4(fscp) &&
279 		    fscp->fs_info.fi_root != backfileno) {
280 			fscp->fs_info.fi_root = backfileno;
281 		}
282 	}
283 
284 	if (setflags) {
285 		mutex_enter(&fscp->fs_fslock);
286 		fscp->fs_flags |= CFS_FS_READ;
287 		if ((cachep->c_flags & CACHE_NOFILL) == 0)
288 			fscp->fs_flags |= CFS_FS_WRITE;
289 		mutex_exit(&fscp->fs_fslock);
290 	}
291 
292 	return (0);
293 }
294 
295 /*
296  * ------------------------------------------------------------------
297  *
298  *		fscache_activate
299  *
300  * Description:
301  *	A wrapper routine for fscache_setup, telling it to setup the
302  *	fscache for general use.
303  *
304  */
305 int
306 fscache_activate(fscache_t *fscp, ino64_t fsid, char *namep,
307     struct cachefsoptions *optp, ino64_t backfileno)
308 {
309 	return (fscache_setup(fscp, fsid, namep, optp, backfileno, 1));
310 }
311 
312 /*
313  * ------------------------------------------------------------------
314  *
315  *		fscache_enable
316  *
317  * Description:
318  *	A wrapper routine for fscache_setup, telling it to create a
319  *	fscache that can be used during remount.  In this case the
320  *	fscache flags that allow general use are not yet turned on.
321  *	A later call to fscache_activate_rw will set the flags.
322  *
323  */
324 int
325 fscache_enable(fscache_t *fscp, ino64_t fsid, char *namep,
326     struct cachefsoptions *optp, ino64_t backfileno)
327 {
328 	return (fscache_setup(fscp, fsid, namep, optp, backfileno, 0));
329 }
330 
331 /*
332  * ------------------------------------------------------------------
333  *
334  *		fscache_activate_rw
335  *
336  * Description:
337  *	Makes the fscache both readable and writable.
338  * Arguments:
339  *	fscp		fscache object
340  * Returns:
341  * Preconditions:
342  *	precond(fscp)
343  */
344 
345 void
346 fscache_activate_rw(fscache_t *fscp)
347 {
348 	mutex_enter(&fscp->fs_fslock);
349 	fscp->fs_flags |= (CFS_FS_WRITE|CFS_FS_READ);
350 	mutex_exit(&fscp->fs_fslock);
351 }
352 
353 /*
354  * ------------------------------------------------------------------
355  *
356  *		fscache_hold
357  *
358  * Description:
359  *	Increments the reference count on the fscache object
360  * Arguments:
361  *	fscp		fscache object to incriment reference count on
362  * Returns:
363  * Preconditions:
364  *	precond(fscp)
365  */
366 
367 void
368 fscache_hold(fscache_t *fscp)
369 {
370 	mutex_enter(&fscp->fs_fslock);
371 	fscp->fs_ref++;
372 	ASSERT(fscp->fs_ref > 0);
373 	mutex_exit(&fscp->fs_fslock);
374 }
375 
376 /*
377  * ------------------------------------------------------------------
378  *
379  *		fscache_rele
380  *
381  * Description:
382  *	Decriments the reference count on the fscache object
383  * Arguments:
384  *	fscp		fscache object to decriment reference count on
385  * Returns:
386  * Preconditions:
387  *	precond(fscp)
388  */
389 
390 void
391 fscache_rele(fscache_t *fscp)
392 {
393 	mutex_enter(&fscp->fs_fslock);
394 	ASSERT(fscp->fs_ref > 0);
395 	fscp->fs_ref--;
396 	mutex_exit(&fscp->fs_fslock);
397 }
398 
399 /*
400  * ------------------------------------------------------------------
401  *
402  *		fscache_cnodecnt
403  *
404  * Description:
405  *	Changes the count of number of cnodes on this fscache
406  *	by the specified amount.
407  * Arguments:
408  *	fscp		fscache object to to modify count on
409  *	cnt		amount to adjust by
410  * Returns:
411  *	Returns new count of number of cnodes.
412  * Preconditions:
413  *	precond(fscp)
414  */
415 
416 int
417 fscache_cnodecnt(fscache_t *fscp, int cnt)
418 {
419 	int xx;
420 
421 	mutex_enter(&fscp->fs_fslock);
422 	fscp->fs_cnodecnt += cnt;
423 	ASSERT(fscp->fs_cnodecnt >= 0);
424 	xx = fscp->fs_cnodecnt;
425 	mutex_exit(&fscp->fs_fslock);
426 	return (xx);
427 }
428 
429 /*
430  * ------------------------------------------------------------------
431  *
432  *		fscache_mounted
433  *
434  * Description:
435  *	Called to indicate the the fscache is mounted.
436  * Arguments:
437  *	fscp		fscache object
438  *	cfsvfsp		cachefs vfsp
439  *	backvfsp	vfsp of back file system
440  * Returns:
441  *	Returns 0 for success, -1 if the cache is already mounted.
442  * Preconditions:
443  *	precond(fscp)
444  */
445 
446 int
447 fscache_mounted(fscache_t *fscp, struct vfs *cfsvfsp, struct vfs *backvfsp)
448 {
449 	int error = 0;
450 
451 	mutex_enter(&fscp->fs_fslock);
452 	if (fscp->fs_flags & CFS_FS_MOUNTED) {
453 		error = -1;
454 		goto out;
455 	}
456 
457 	fscp->fs_backvfsp = backvfsp;
458 	fscp->fs_cfsvfsp = cfsvfsp;
459 	gethrestime(&fscp->fs_cod_time);
460 	fscp->fs_flags |= CFS_FS_MOUNTED;
461 
462 	if (CFS_ISFS_SNR(fscp)) {
463 		/*
464 		 * If there is a dlog file present, then we assume the cache
465 		 * was left in disconnected mode.
466 		 * Also if the back file system was not mounted we also
467 		 * start off in disconnected mode.
468 		 */
469 		error = cachefs_dlog_setup(fscp, 0);
470 		if (!error || (backvfsp == NULL)) {
471 			mutex_enter(&fscp->fs_cdlock);
472 			fscp->fs_cdconnected = CFS_CD_DISCONNECTED;
473 			fscp->fs_cdtransition = 0;
474 			cv_broadcast(&fscp->fs_cdwaitcv);
475 			mutex_exit(&fscp->fs_cdlock);
476 		}
477 
478 		/* invalidate any local fileno mappings */
479 		fscp->fs_info.fi_resetfileno++;
480 		fscp->fs_flags |= CFS_FS_DIRTYINFO;
481 
482 		/* if connected, invalidate any local time mappings */
483 		if (backvfsp)
484 			fscp->fs_info.fi_resettimes++;
485 	}
486 
487 		error = 0;
488 
489 	/* set up the consistency mode */
490 	if (fscp->fs_info.fi_mntflags & CFS_NOCONST_MODE) {
491 		fscp->fs_cfsops = &nopcfsops;
492 		fscp->fs_consttype = CFS_FS_CONST_NOCONST;
493 	} else if (fscp->fs_info.fi_mntflags & CFS_CODCONST_MODE) {
494 		fscp->fs_cfsops = &codcfsops;
495 		fscp->fs_consttype = CFS_FS_CONST_CODCONST;
496 	} else {
497 		fscp->fs_cfsops = &strictcfsops;
498 		fscp->fs_consttype = CFS_FS_CONST_STRICT;
499 	}
500 
501 out:
502 	mutex_exit(&fscp->fs_fslock);
503 	(void) fscache_info_sync(fscp);
504 	return (error);
505 }
506 
507 /*
508  * Compares fscache state with new mount options
509  * to make sure compatible.
510  * Returns ESRCH if not compatible or 0 for success.
511  */
512 int
513 fscache_compare_options(fscache_t *fscp, struct cachefsoptions *optp)
514 {
515 	if ((fscp->fs_info.fi_popsize == optp->opt_popsize) &&
516 	    (fscp->fs_info.fi_fgsize == optp->opt_fgsize)) {
517 		return (0);
518 	} else {
519 		return (ESRCH);
520 	}
521 }
522 
523 /*
524  * ------------------------------------------------------------------
525  *
526  *		fscache_sync
527  *
528  * Description:
529  *	Syncs any data for this fscache to the front file system.
530  * Arguments:
531  *	fscp	fscache to sync
532  * Returns:
533  * Preconditions:
534  *	precond(fscp)
535  */
536 
537 void
538 fscache_sync(struct fscache *fscp)
539 {
540 	struct filegrp *fgp;
541 	int xx;
542 
543 	(void) fscache_info_sync(fscp);
544 
545 	/* sync the cnodes */
546 	cachefs_cnode_traverse(fscp, cachefs_cnode_sync);
547 
548 	mutex_enter(&fscp->fs_fslock);
549 
550 	/* sync the attrcache files */
551 	for (xx = 0; xx < CFS_FS_FGP_BUCKET_SIZE; xx++) {
552 		for (fgp = fscp->fs_filegrp[xx]; fgp != NULL;
553 			fgp = fgp->fg_next) {
554 			(void) filegrp_sync(fgp);
555 		}
556 	}
557 
558 	/* garbage collect any unused file groups */
559 	filegrp_list_gc(fscp);
560 
561 	mutex_exit(&fscp->fs_fslock);
562 }
563 
564 /*
565  * ------------------------------------------------------------------
566  *
567  *		fscache_acset
568  *
569  * Description:
570  *	Sets the ac timeout values for the fscache.
571  * Arguments:
572  *	fscp	fscache object
573  * Returns:
574  * Preconditions:
575  *	precond(fscp)
576  */
577 
578 void
579 fscache_acset(fscache_t *fscp,
580 	uint_t acregmin, uint_t acregmax, uint_t acdirmin, uint_t acdirmax)
581 {
582 	mutex_enter(&fscp->fs_fslock);
583 	if (acregmin > acregmax)
584 		acregmin = acregmax;
585 	if (acdirmin > acdirmax)
586 		acdirmin = acdirmax;
587 	if (acregmin != 0)
588 		fscp->fs_acregmin = acregmin;
589 	if (acregmax != 0)
590 		fscp->fs_acregmax = acregmax;
591 	if (acdirmin != 0)
592 		fscp->fs_acdirmin = acdirmin;
593 	if (acdirmax != 0)
594 		fscp->fs_acdirmax = acdirmax;
595 	mutex_exit(&fscp->fs_fslock);
596 }
597 
598 /*
599  * ------------------------------------------------------------------
600  *
601  *		fscache_list_find
602  *
603  * Description:
604  *	Finds the desired fscache structure on a cache's
605  *	file system list.
606  * Arguments:
607  *	cachep	holds the list of fscache objects to search
608  *	fsid	the numeric identifier of the fscache
609  * Returns:
610  *	Returns an fscache object on success or NULL on failure.
611  * Preconditions:
612  *	precond(cachep)
613  *	precond(the fslistlock must be held)
614  */
615 
616 fscache_t *
617 fscache_list_find(cachefscache_t *cachep, ino64_t fsid)
618 {
619 	fscache_t *fscp = cachep->c_fslist;
620 
621 	ASSERT(MUTEX_HELD(&cachep->c_fslistlock));
622 
623 	while (fscp != NULL) {
624 		if (fscp->fs_cfsid == fsid) {
625 			ASSERT(fscp->fs_cache == cachep);
626 			break;
627 		}
628 		fscp = fscp->fs_next;
629 	}
630 
631 	return (fscp);
632 }
633 
634 /*
635  * ------------------------------------------------------------------
636  *
637  *		fscache_list_add
638  *
639  * Description:
640  *	Adds the specified fscache object to the list on
641  *	the specified cachep.
642  * Arguments:
643  *	cachep	holds the list of fscache objects
644  *	fscp	fscache object to add to list
645  * Returns:
646  * Preconditions:
647  *	precond(cachep)
648  *	precond(fscp)
649  *	precond(fscp cannot already be on a list)
650  *	precond(the fslistlock must be held)
651  */
652 
653 void
654 fscache_list_add(cachefscache_t *cachep, fscache_t *fscp)
655 {
656 	ASSERT(MUTEX_HELD(&cachep->c_fslistlock));
657 
658 	fscp->fs_next = cachep->c_fslist;
659 	cachep->c_fslist = fscp;
660 	cachep->c_refcnt++;
661 }
662 
663 /*
664  * ------------------------------------------------------------------
665  *
666  *		fscache_list_remove
667  *
668  * Description:
669  *	Removes the specified fscache object from the list
670  *	on the specified cachep.
671  * Arguments:
672  *	cachep	holds the list of fscache objects
673  *	fscp	fscache object to remove from list
674  * Returns:
675  * Preconditions:
676  *	precond(cachep)
677  *	precond(fscp)
678  *	precond(the fslistlock must be held)
679  */
680 
681 void
682 fscache_list_remove(cachefscache_t *cachep, fscache_t *fscp)
683 {
684 	struct fscache **pfscp = &cachep->c_fslist;
685 
686 	ASSERT(MUTEX_HELD(&cachep->c_fslistlock));
687 
688 	while (*pfscp != NULL) {
689 		if (fscp == *pfscp) {
690 			*pfscp = fscp->fs_next;
691 			cachep->c_refcnt--;
692 			break;
693 		}
694 		pfscp = &(*pfscp)->fs_next;
695 	}
696 }
697 
698 /*
699  * ------------------------------------------------------------------
700  *
701  *		fscache_list_gc
702  *
703  * Description:
704  *	Traverses the list of fscache objects on the cachep
705  *	list and destroys any that are not mounted and
706  *	that are not referenced.
707  * Arguments:
708  *	cachep	holds the list of fscache objects
709  * Returns:
710  * Preconditions:
711  *	precond(cachep)
712  *	precond(the fslistlock must be held)
713  */
714 
715 void
716 fscache_list_gc(cachefscache_t *cachep)
717 {
718 	struct fscache *next, *fscp;
719 
720 	ASSERT(MUTEX_HELD(&cachep->c_fslistlock));
721 
722 	for (fscp = cachep->c_fslist; fscp != NULL; fscp = next) {
723 		next = fscp->fs_next;
724 		mutex_enter(&fscp->fs_fslock);
725 		if (((fscp->fs_flags & CFS_FS_MOUNTED) == 0) &&
726 		    (fscp->fs_ref == 0)) {
727 			mutex_exit(&fscp->fs_fslock);
728 			fscache_list_remove(cachep, fscp);
729 			fscache_destroy(fscp);
730 		} else {
731 			mutex_exit(&fscp->fs_fslock);
732 		}
733 	}
734 }
735 
736 /*
737  * ------------------------------------------------------------------
738  *
739  *		fscache_list_mounted
740  *
741  * Description:
742  *	Returns the number of fscache objects that are mounted.
743  * Arguments:
744  *	cachep	holds the list of fscache objects
745  * Returns:
746  * Preconditions:
747  *	precond(cachep)
748  *	precond(the fslistlock must be held)
749  */
750 
751 int
752 fscache_list_mounted(cachefscache_t *cachep)
753 {
754 	struct fscache *fscp;
755 	int count;
756 
757 	ASSERT(MUTEX_HELD(&cachep->c_fslistlock));
758 
759 	count = 0;
760 	for (fscp = cachep->c_fslist; fscp != NULL; fscp = fscp->fs_next) {
761 		mutex_enter(&fscp->fs_fslock);
762 		if (fscp->fs_flags & CFS_FS_MOUNTED)
763 			count++;
764 		mutex_exit(&fscp->fs_fslock);
765 	}
766 
767 	return (count);
768 }
769 
770 /*
771  * Creates the fs cache directory.
772  * The directory name is the ascii version of the fsid.
773  * Also makes a symlink to the directory using the specified name.
774  */
775 int
776 fscdir_create(cachefscache_t *cachep, char *namep, fscache_t *fscp)
777 {
778 	int error;
779 	vnode_t *fscdirvp = NULL;
780 	vnode_t *infovp = NULL;
781 	vnode_t *attrvp = NULL;
782 	struct vattr *attrp = (struct vattr *)NULL;
783 	char name[CFS_FRONTFILE_NAME_SIZE];
784 	int files;
785 	int blocks = 0;
786 	cfs_cid_t cid;
787 	ino64_t fsid;
788 
789 	ASSERT(MUTEX_HELD(&cachep->c_fslistlock));
790 	ASSERT(fscp->fs_infovp == NULL);
791 	ASSERT(fscp->fs_fscdirvp == NULL);
792 	ASSERT(fscp->fs_fsattrdir == NULL);
793 
794 	/* directory, symlink and options file + attrcache dir */
795 	files = 0;
796 	while (files < 4) {
797 		error = cachefs_allocfile(cachep);
798 		if (error)
799 			goto out;
800 		files++;
801 	}
802 	error = cachefs_allocblocks(cachep, 4, CACHEFS_RL_NONE);
803 	if (error)
804 		goto out;
805 	blocks = 4;
806 
807 	attrp = cachefs_kmem_alloc(sizeof (struct vattr), KM_SLEEP);
808 	attrp->va_mode = S_IFDIR | 0777;
809 	attrp->va_uid = 0;
810 	attrp->va_gid = 0;
811 	attrp->va_type = VDIR;
812 	attrp->va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID;
813 	error = VOP_MKDIR(cachep->c_dirvp, namep, attrp, &fscdirvp, kcred,
814 	    NULL, 0, NULL);
815 	if (error) {
816 		cmn_err(CE_WARN, "Can't create fs cache directory");
817 		goto out;
818 	}
819 
820 	/*
821 	 * Created the directory. Get the fileno. That'll be the cachefs_fsid.
822 	 */
823 	attrp->va_mask = AT_NODEID;
824 	error = VOP_GETATTR(fscdirvp, attrp, 0, kcred, NULL);
825 	if (error) {
826 		goto out;
827 	}
828 	fsid = attrp->va_nodeid;
829 	attrp->va_mode = S_IFREG | 0666;
830 	attrp->va_uid = 0;
831 	attrp->va_gid = 0;
832 	attrp->va_type = VREG;
833 	attrp->va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID;
834 	error = VOP_CREATE(fscdirvp, CACHEFS_FSINFO, attrp, EXCL,
835 			0600, &infovp, kcred, 0, NULL, NULL);
836 	if (error) {
837 		cmn_err(CE_WARN, "Can't create fs option file");
838 		goto out;
839 	}
840 	attrp->va_size = MAXBSIZE;
841 	attrp->va_mask = AT_SIZE;
842 	error = VOP_SETATTR(infovp, attrp, 0, kcred, NULL);
843 	if (error) {
844 		cmn_err(CE_WARN, "Can't set size of fsinfo file");
845 		goto out;
846 	}
847 
848 	/* write out the info file */
849 	fscp->fs_flags |= CFS_FS_DIRTYINFO;
850 	error = fscache_info_sync(fscp);
851 	if (error)
852 		goto out;
853 
854 	/*
855 	 * Install the symlink from cachefs_fsid -> directory.
856 	 */
857 	cid.cid_flags = 0;
858 	cid.cid_fileno = fsid;
859 	make_ascii_name(&cid, name);
860 	error = VOP_RENAME(cachep->c_dirvp, namep, cachep->c_dirvp,
861 		name, kcred, NULL, 0);
862 	if (error) {
863 		cmn_err(CE_WARN, "Can't rename cache directory");
864 		goto out;
865 	}
866 	attrp->va_mask = AT_MODE | AT_TYPE;
867 	attrp->va_mode = 0777;
868 	attrp->va_type = VLNK;
869 	error = VOP_SYMLINK(cachep->c_dirvp, namep, attrp, name, kcred, NULL,
870 	    0);
871 	if (error) {
872 		cmn_err(CE_WARN, "Can't create cache directory symlink");
873 		goto out;
874 	}
875 
876 	/*
877 	 * Finally, make the attrcache directory
878 	 */
879 	attrp->va_mode = S_IFDIR | 0777;
880 	attrp->va_uid = 0;
881 	attrp->va_gid = 0;
882 	attrp->va_type = VDIR;
883 	attrp->va_mask = AT_TYPE | AT_MODE | AT_UID | AT_GID;
884 	error = VOP_MKDIR(fscdirvp, ATTRCACHE_NAME, attrp, &attrvp, kcred, NULL,
885 	    0, NULL);
886 	if (error) {
887 		cmn_err(CE_WARN, "Can't create attrcache dir for fscache");
888 		goto out;
889 	}
890 
891 	mutex_enter(&fscp->fs_fslock);
892 	fscp->fs_cfsid = fsid;
893 	fscp->fs_fscdirvp = fscdirvp;
894 	fscp->fs_fsattrdir = attrvp;
895 	fscp->fs_infovp = infovp;
896 	mutex_exit(&fscp->fs_fslock);
897 
898 out:
899 
900 	if (error) {
901 		while (files-- > 0)
902 			cachefs_freefile(cachep);
903 		if (fscdirvp)
904 			VN_RELE(fscdirvp);
905 		if (blocks)
906 			cachefs_freeblocks(cachep, blocks, CACHEFS_RL_NONE);
907 		if (attrvp)
908 			VN_RELE(attrvp);
909 		if (infovp)
910 			VN_RELE(infovp);
911 	}
912 	if (attrp)
913 		cachefs_kmem_free(attrp, sizeof (struct vattr));
914 	return (error);
915 }
916 
917 /*
918  * Tries to find the fscache directory indicated by fsid.
919  */
920 int
921 fscdir_find(cachefscache_t *cachep, ino64_t fsid, fscache_t *fscp)
922 {
923 	int error;
924 	vnode_t *infovp = NULL;
925 	vnode_t *fscdirvp = NULL;
926 	vnode_t *attrvp = NULL;
927 	char dirname[CFS_FRONTFILE_NAME_SIZE];
928 	cfs_cid_t cid;
929 	cachefs_fsinfo_t fsinfo;
930 	caddr_t addr;
931 
932 	ASSERT(MUTEX_HELD(&cachep->c_fslistlock));
933 	ASSERT(fscp->fs_infovp == NULL);
934 	ASSERT(fscp->fs_fscdirvp == NULL);
935 	ASSERT(fscp->fs_fsattrdir == NULL);
936 
937 	/* convert the fsid value to the name of the directory */
938 	cid.cid_flags = 0;
939 	cid.cid_fileno = fsid;
940 	make_ascii_name(&cid, dirname);
941 
942 	/* try to find the directory */
943 	error = VOP_LOOKUP(cachep->c_dirvp, dirname, &fscdirvp, NULL,
944 			0, NULL, kcred, NULL, NULL, NULL);
945 	if (error)
946 		goto out;
947 
948 	/* this better be a directory or we are hosed */
949 	if (fscdirvp->v_type != VDIR) {
950 		cmn_err(CE_WARN, "cachefs: fscdir_find_a: cache corruption"
951 			" run fsck, %s", dirname);
952 		error = ENOTDIR;
953 		goto out;
954 	}
955 
956 	/* try to find the info file */
957 	error = VOP_LOOKUP(fscdirvp, CACHEFS_FSINFO, &infovp,
958 	    NULL, 0, NULL, kcred, NULL, NULL, NULL);
959 	if (error) {
960 		cmn_err(CE_WARN, "cachefs: fscdir_find_b: cache corruption"
961 			" run fsck, %s", dirname);
962 		goto out;
963 	}
964 
965 	/* read in info struct */
966 	addr = segmap_getmapflt(segkmap, infovp, (offset_t)0,
967 				MAXBSIZE, 1, S_READ);
968 
969 	/*LINTED alignment okay*/
970 	fsinfo = *(cachefs_fsinfo_t *)addr;
971 	error =  segmap_release(segkmap, addr, 0);
972 	if (error) {
973 		cmn_err(CE_WARN, "cachefs: fscdir_find_c: cache corruption"
974 			" run fsck, %s", dirname);
975 		goto out;
976 	}
977 
978 	/* try to find the attrcache directory */
979 	error = VOP_LOOKUP(fscdirvp, ATTRCACHE_NAME,
980 	    &attrvp, NULL, 0, NULL, kcred, NULL, NULL, NULL);
981 	if (error) {
982 		cmn_err(CE_WARN, "cachefs: fscdir_find_d: cache corruption"
983 			" run fsck, %s", dirname);
984 		goto out;
985 	}
986 
987 	mutex_enter(&fscp->fs_fslock);
988 	fscp->fs_info = fsinfo;
989 	fscp->fs_cfsid = fsid;
990 	fscp->fs_fscdirvp = fscdirvp;
991 	fscp->fs_fsattrdir = attrvp;
992 	fscp->fs_infovp = infovp;
993 	mutex_exit(&fscp->fs_fslock);
994 
995 out:
996 	if (error) {
997 		if (infovp)
998 			VN_RELE(infovp);
999 		if (fscdirvp)
1000 			VN_RELE(fscdirvp);
1001 	}
1002 	return (error);
1003 }
1004 
1005 /*
1006  * fscache_info_sync
1007  * Writes out the fs_info data if necessary.
1008  */
1009 static int
1010 fscache_info_sync(fscache_t *fscp)
1011 {
1012 	caddr_t addr;
1013 	int error = 0;
1014 
1015 	mutex_enter(&fscp->fs_fslock);
1016 
1017 	if (fscp->fs_cache->c_flags & CACHE_NOFILL) {
1018 		error = EROFS;
1019 		goto out;
1020 	}
1021 
1022 	/* if the data is dirty and we have the file vnode */
1023 	if ((fscp->fs_flags & CFS_FS_DIRTYINFO) && fscp->fs_infovp) {
1024 		addr = segmap_getmapflt(segkmap, fscp->fs_infovp, 0,
1025 					MAXBSIZE, 1, S_WRITE);
1026 
1027 		/*LINTED alignment okay*/
1028 		*(cachefs_fsinfo_t *)addr = fscp->fs_info;
1029 		error = segmap_release(segkmap, addr, SM_WRITE);
1030 
1031 		if (error) {
1032 			cmn_err(CE_WARN,
1033 			    "cachefs: Can not write to info file.");
1034 		} else {
1035 			fscp->fs_flags &= ~CFS_FS_DIRTYINFO;
1036 		}
1037 	}
1038 
1039 out:
1040 
1041 	mutex_exit(&fscp->fs_fslock);
1042 
1043 	return (error);
1044 }
1045 
1046 /*
1047  * ------------------------------------------------------------------
1048  *
1049  *		fscache_name_to_fsid
1050  *
1051  * Description:
1052  *	Takes the name of a cache and determines it corresponding
1053  *	fsid.
1054  * Arguments:
1055  *	cachep	cache object to find name of fs cache in
1056  *	namep	the name of the fs cache
1057  *	fsidp	set to the fsid if found
1058  * Returns:
1059  *	Returns 0 on success, !0 on error.
1060  * Preconditions:
1061  *	precond(cachep)
1062  *	precond(namep)
1063  *	precond(fsidp)
1064  */
1065 
1066 int
1067 fscache_name_to_fsid(cachefscache_t *cachep, char *namep, ino64_t *fsidp)
1068 {
1069 	int error;
1070 	char dirname[CFS_FRONTFILE_NAME_SIZE];
1071 	vnode_t *linkvp = NULL;
1072 	struct uio uio;
1073 	struct iovec iov;
1074 	ino64_t nodeid;
1075 	char *pd;
1076 	int xx;
1077 	int c;
1078 
1079 	/* get the vnode of the name */
1080 	error = VOP_LOOKUP(cachep->c_dirvp, namep, &linkvp, NULL, 0, NULL,
1081 		kcred, NULL, NULL, NULL);
1082 	if (error)
1083 		goto out;
1084 
1085 	/* the vnode had better be a link */
1086 	if (linkvp->v_type != VLNK) {
1087 		error = EINVAL;
1088 		goto out;
1089 	}
1090 
1091 	/* read the contents of the link */
1092 	iov.iov_len = CFS_FRONTFILE_NAME_SIZE;
1093 	iov.iov_base = dirname;
1094 	uio.uio_iov = &iov;
1095 	uio.uio_iovcnt = 1;
1096 	uio.uio_resid = iov.iov_len;
1097 	uio.uio_segflg = UIO_SYSSPACE;
1098 	uio.uio_loffset = 0;
1099 	uio.uio_fmode = 0;
1100 	uio.uio_extflg = UIO_COPY_CACHED;
1101 	error = VOP_READLINK(linkvp, &uio, kcred, NULL);
1102 	if (error) {
1103 		cmn_err(CE_WARN, "cachefs: Can't read filesystem cache link");
1104 		goto out;
1105 	}
1106 
1107 	/* convert the contents of the link to a ino64_t */
1108 	nodeid = 0;
1109 	pd = dirname;
1110 	for (xx = 0; xx < (CFS_FRONTFILE_NAME_SIZE - 2); xx++) {
1111 		nodeid <<= 4;
1112 		c = *pd++;
1113 		if (c <= '9')
1114 			c -= '0';
1115 		else if (c <= 'F')
1116 			c = c - 'A' + 10;
1117 		else
1118 			c = c - 'a' + 10;
1119 		nodeid += c;
1120 	}
1121 	*fsidp = nodeid;
1122 out:
1123 	if (linkvp)
1124 		VN_RELE(linkvp);
1125 
1126 	return (error);
1127 }
1128 
1129 
1130 /*
1131  * Suspends the thread until access to the cache is granted.
1132  * If !SOFT then
1133  *	waitconnected == 1 means wait until connected
1134  *	waitconnected == 0 means wait until connected or disconnected
1135  * else then
1136  *	wait until connected or disconnected
1137  * writing is set to 1 if writing, 0 if reading
1138  * Returns 0, EINTR, or ETIMEDOUT.
1139  */
1140 int
1141 cachefs_cd_access(fscache_t *fscp, int waitconnected, int writing)
1142 {
1143 	int nosig;
1144 	int error = 0;
1145 	cachefscache_t *cachep;
1146 	int waithappens = 0;
1147 	pid_t pid;
1148 
1149 	mutex_enter(&fscp->fs_cdlock);
1150 
1151 #ifdef CFS_CD_DEBUG
1152 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
1153 #endif
1154 
1155 	for (;;) {
1156 		/* if we have to wait */
1157 		if (waithappens ||
1158 		    (waitconnected &&
1159 		    (fscp->fs_cdconnected != CFS_CD_CONNECTED))) {
1160 
1161 			/* do not make soft mounts wait until connected */
1162 			if ((waithappens == 0) && CFS_ISFS_SOFT(fscp)) {
1163 				error = ETIMEDOUT;
1164 				break;
1165 			}
1166 
1167 			/* wait for a wakeup or a signal */
1168 			nosig = cv_wait_sig(&fscp->fs_cdwaitcv,
1169 			    &fscp->fs_cdlock);
1170 
1171 			/* if we got a signal */
1172 			if (nosig == 0) {
1173 				error = EINTR;
1174 				break;
1175 			}
1176 
1177 			if (waitconnected &&
1178 			    (fscp->fs_cdconnected == CFS_CD_CONNECTED))
1179 				waitconnected = 0;
1180 
1181 			/* try again to get access */
1182 			waithappens = 0;
1183 			continue;
1184 		}
1185 
1186 		/* if transitioning modes */
1187 		if (fscp->fs_cdtransition) {
1188 			waithappens = 1;
1189 			continue;
1190 		}
1191 
1192 		/* if rolling the log */
1193 		if (fscp->fs_cdconnected == CFS_CD_RECONNECTING) {
1194 			pid = ttoproc(curthread)->p_pid;
1195 			cachep = fscp->fs_cache;
1196 
1197 			/* if writing or not the cachefsd */
1198 			if (writing ||
1199 			    ((fscp->fs_cddaemonid != pid) &&
1200 			    (cachep->c_rootdaemonid != pid))) {
1201 				waithappens = 1;
1202 				continue;
1203 			}
1204 		}
1205 
1206 		/* if the daemon is not running */
1207 		if (fscp->fs_cddaemonid == 0) {
1208 			/* if writing and not connected */
1209 			if (writing &&
1210 			    (fscp->fs_cdconnected != CFS_CD_CONNECTED)) {
1211 				waithappens = 1;
1212 				continue;
1213 			}
1214 		}
1215 
1216 		/*
1217 		 * Verify don't set wait for NFSv4 (doesn't support
1218 		 * disconnected behavior).
1219 		 */
1220 		ASSERT(!CFS_ISFS_BACKFS_NFSV4(fscp) ||
1221 				(waithappens == 0 && waitconnected == 0));
1222 
1223 		ASSERT(fscp->fs_cdrefcnt >= 0);
1224 		fscp->fs_cdrefcnt++;
1225 #ifdef CFS_CD_DEBUG
1226 		curthread->t_flag |= T_CD_HELD;
1227 #endif
1228 		break;
1229 	}
1230 	mutex_exit(&fscp->fs_cdlock);
1231 
1232 	return (error);
1233 }
1234 
1235 /*
1236  * Call to check if can have access after a cache miss has occurred.
1237  * Only read access is allowed, do not call this routine if want
1238  * to write.
1239  * Returns 1 if yes, 0 if no.
1240  */
1241 int
1242 cachefs_cd_access_miss(fscache_t *fscp)
1243 {
1244 	cachefscache_t *cachep;
1245 	pid_t pid;
1246 
1247 #ifdef CFS_CD_DEBUG
1248 	ASSERT(curthread->t_flag & T_CD_HELD);
1249 #endif
1250 
1251 	/* should not get called if connected */
1252 	ASSERT(fscp->fs_cdconnected != CFS_CD_CONNECTED);
1253 
1254 	/* if no back file system, then no */
1255 	if (fscp->fs_backvfsp == NULL)
1256 		return (0);
1257 
1258 	/* if daemon is not running, then yes */
1259 	if (fscp->fs_cddaemonid == 0) {
1260 		return (1);
1261 	}
1262 
1263 	pid = ttoproc(curthread)->p_pid;
1264 	cachep = fscp->fs_cache;
1265 
1266 	/* if daemon is running, only daemon is allowed to have access */
1267 	if ((fscp->fs_cddaemonid != pid) &&
1268 	    (cachep->c_rootdaemonid != pid)) {
1269 		return (0);
1270 	}
1271 
1272 	return (1);
1273 }
1274 
1275 /*
1276  * Releases an access to the file system.
1277  */
1278 void
1279 cachefs_cd_release(fscache_t *fscp)
1280 {
1281 	mutex_enter(&fscp->fs_cdlock);
1282 
1283 #ifdef CFS_CD_DEBUG
1284 	ASSERT(curthread->t_flag & T_CD_HELD);
1285 	curthread->t_flag &= ~T_CD_HELD;
1286 #endif
1287 	/* decriment hold on file system */
1288 	fscp->fs_cdrefcnt--;
1289 	ASSERT(fscp->fs_cdrefcnt >= 0);
1290 
1291 	/* Verify no connected state transitions for NFSv4 */
1292 	ASSERT(!CFS_ISFS_BACKFS_NFSV4(fscp) || fscp->fs_cdtransition == 0);
1293 
1294 	/* wake up cachefsd */
1295 	if ((fscp->fs_cdrefcnt == 0) && fscp->fs_cdtransition)
1296 		cv_broadcast(&fscp->fs_cdwaitcv);
1297 
1298 	mutex_exit(&fscp->fs_cdlock);
1299 }
1300 
1301 /*
1302  * Called when a network timeout error has occurred.
1303  * If connected, switches state to disconnected.
1304  */
1305 void
1306 cachefs_cd_timedout(fscache_t *fscp)
1307 {
1308 	int state;
1309 
1310 	/* nothing to do if not snr or not connected */
1311 	if (!CFS_ISFS_SNR(fscp) || (fscp->fs_cdconnected != CFS_CD_CONNECTED))
1312 		return;
1313 
1314 #ifdef CFS_CD_DEBUG
1315 	ASSERT((curthread->t_flag & T_CD_HELD) == 0);
1316 #endif
1317 
1318 	/* Verify no state changes done for NFSv4 */
1319 	ASSERT(CFS_ISFS_BACKFS_NFSV4(fscp) == 0);
1320 
1321 	state = CFS_FS_DISCONNECTED;
1322 	(void) cachefs_io_stateset(fscp->fs_rootvp, &state, NULL);
1323 }
1324