xref: /titanic_44/usr/src/uts/common/sys/fs/cachefs_fs.h (revision ba3594ba9b5dd4c846c472a8d657edcb7c8109ac)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2014 Garrett D'Amore <garrett@damore.org>
24  *
25  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
26  * Use is subject to license terms.
27  */
28 
29 #ifndef _SYS_FS_CACHEFS_FS_H
30 #define	_SYS_FS_CACHEFS_FS_H
31 
32 #include <sys/vnode.h>
33 #include <sys/vfs.h>
34 #include <sys/types.h>
35 #include <sys/types32.h>
36 #include <sys/t_lock.h>
37 #include <sys/thread.h>
38 #include <sys/kmem.h>
39 #include <sys/inttypes.h>
40 #include <sys/time_impl.h>
41 #include <sys/systm.h>
42 
43 #ifdef __cplusplus
44 extern "C" {
45 #endif
46 
47 #ifdef CFSDEBUG
48 #define	CFSDEBUG_ALL		0xffffffff
49 #define	CFSDEBUG_NONE		0x0
50 #define	CFSDEBUG_GENERAL	0x1
51 #define	CFSDEBUG_SUBR		0x2
52 #define	CFSDEBUG_CNODE		0x4
53 #define	CFSDEBUG_DIR		0x8
54 #define	CFSDEBUG_STRICT		0x10
55 #define	CFSDEBUG_VOPS		0x20
56 #define	CFSDEBUG_VFSOP		0x40
57 #define	CFSDEBUG_RESOURCE	0x80
58 #define	CFSDEBUG_CHEAT		0x100
59 #define	CFSDEBUG_INVALIDATE	0x200
60 #define	CFSDEBUG_DLOG		0x400
61 #define	CFSDEBUG_FILEGRP	0x800
62 #define	CFSDEBUG_IOCTL		0x1000
63 #define	CFSDEBUG_FRONT		0x2000
64 #define	CFSDEBUG_BACK		0x4000
65 #define	CFSDEBUG_ALLOCMAP	0x8000
66 #define	CFSDEBUG_ASYNCPOP	0x10000
67 #define	CFSDEBUG_VOPS_NFSV4	0x20000
68 
69 #define	CFSCLEANFLAG
70 
71 extern int cachefsdebug;
72 
73 #define	CFS_DEBUG(N)    if (cachefsdebug & (N))
74 #endif /* DEBUG */
75 
76 #if 0
77 #ifdef CFSDEBUG
78 	/*
79 	 * Testing usage of cd_access and friends.
80 	 * Note we steal an unused bit in t_flag.
81 	 * This will certainly bite us later.
82 	 */
83 #define	CFS_CD_DEBUG
84 #define	T_CD_HELD	0x01000
85 #endif
86 #endif
87 
88 /*
89  * Note: in an RL debugging kernel, CFSVERSION is augmented by 100
90  *
91  * Version History:
92  *
93  * Beginning -- Solaris 2.3 and 2.4: 1
94  *
95  * In Solaris 2.5 alpha, the size of fid_t changed: 2
96  *
97  * In 2.6: Chart, RL pointers/idents became rl_entry: 3
98  *	added which RL list to attrcache header: 4
99  *
100  * Large Files support made version to 6.
101  *
102  * Sequence numbers made version to 7.
103  *
104  * 64-bit on-disk cache will make version 8. Not yet supported.
105  */
106 
107 #if 0
108 #define	CFSRLDEBUG
109 #endif
110 
111 #ifdef CFSRLDEBUG
112 #define	CFSVERSION		110
113 #define	CFSVERSION64		111	/* 64-bit cache - not yet used */
114 #else /* CFSRLDEBUG */
115 #define	CFSVERSION		7
116 #define	CFSVERSION64		8	/* 64-bit cache - not yet used */
117 #endif /* CFSRLDEBUG */
118 
119 /* Some default values */
120 #define	DEF_FILEGRP_SIZE	256
121 #define	DEF_POP_SIZE		0x10000		/* 64K */
122 #define	CACHELABEL_NAME		".cfs_label"
123 #define	RESOURCE_NAME		".cfs_resource"
124 #define	CACHEFS_FSINFO		".cfs_fsinfo"
125 #define	ATTRCACHE_NAME		".cfs_attrcache"
126 #define	CACHEFS_LOSTFOUND_NAME	"lost+found"
127 #define	BACKMNT_NAME		".cfs_mnt_points"
128 #define	CACHEFS_LOCK_FILE	".cfs_lock"
129 #define	CACHEFS_DLOG_FILE	".cfs_dlog"
130 #define	CACHEFS_DMAP_FILE	".cfs_dmap"
131 #define	CACHEFS_MNT_FILE	".cfs_mnt"
132 #define	CACHEFS_UNMNT_FILE	".cfs_unmnt"
133 #define	LOG_STATUS_NAME		".cfs_logging"
134 #define	NOBACKUP_NAME		".nsr"
135 #define	CACHEFS_PREFIX		".cfs_"
136 #define	CACHEFS_PREFIX_LEN	5
137 #define	ROOTLINK_NAME		"root"
138 #define	CFS_FRONTFILE_NAME_SIZE	18
139 #define	CACHEFS_BASETYPE	"cachefs" /* used in statvfs() */
140 #define	CFS_MAXFREECNODES	20
141 #define	CACHEFSTAB		"/etc/cachefstab"
142 #define	CACHEFS_ROOTRUN		"/var/run"
143 #define	CACHEFS_LOCKDIR_PRE	".cachefs." /* used by mount(1M)/fsck(1M) */
144 
145 /*
146  * The options structure is passed in as part of the mount arguments.
147  * It is stored in the .options file and kept track of in the fscache
148  * structure.
149  */
150 struct cachefsoptions {
151 	uint_t		opt_flags;		/* mount flags */
152 	int		opt_popsize;		/* cache population size */
153 	int		opt_fgsize;		/* filegrp size, default 256 */
154 };
155 
156 typedef struct cachefscache cachefscache_t;
157 
158 /*
159  * all the stuff needed to manage a queue of requests to be processed
160  * by async threads.
161  */
162 struct cachefs_workq {
163 	struct cachefs_req	*wq_head;		/* head of work q */
164 	struct cachefs_req	*wq_tail;		/* tail of work q */
165 	int			wq_length;		/* # of requests on q */
166 	int			wq_thread_count;	/* # of threads */
167 	int			wq_max_len;		/* longest queue */
168 	int			wq_halt_request;	/* halt requested */
169 	unsigned int		wq_keepone:1;		/* keep one thread */
170 	unsigned int		wq_logwork:1;		/* write logfile */
171 	kcondvar_t		wq_req_cv;		/* wait on work to do */
172 	kcondvar_t		wq_halt_cv;		/* wait/signal halt */
173 	kmutex_t		wq_queue_lock;		/* protect queue */
174 	cachefscache_t		*wq_cachep;		/* sometimes NULL */
175 };
176 
177 /*
178  * cfs_cid is stored on disk, so it needs to be the same 32-bit vs. 64-bit.
179  */
180 
181 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
182 #pragma pack(4)
183 #endif
184 
185 /* identifies a file in the cache */
186 struct cfs_cid {
187 	ino64_t	cid_fileno;		/* fileno */
188 	int	cid_flags;		/* flags */
189 };
190 typedef struct cfs_cid cfs_cid_t;
191 #define	CFS_CID_LOCAL	1	/* local file */
192 
193 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
194 #pragma pack()
195 #endif
196 
197 /*
198  * XX64 - for now redefine	all time_t fields that are used by both kernel
199  * and user space apps as a 32-bit quantity,
200  */
201 
202 #if (defined(_SYSCALL32) && defined(_LP64))
203 
204 /*
205  * The cfs_* types are used to represent on-disk data, since its size is
206  * independent of the kernel memory model (in the LP64 case)
207  */
208 typedef time32_t		cfs_time_t;
209 typedef timestruc32_t		cfs_timestruc_t;
210 typedef vattr32_t		cfs_vattr_t;
211 typedef fid32_t			cfs_fid_t;
212 
213 #define	cfs_timespec		timespec32
214 #define	cfs_vattr		vattr32
215 #define	cfs_fid			fid32
216 
217 /*
218  * CACHEFS_DEV_COPY copies between two dev_t's. It expands or compresses
219  * them based on type changes (if needed).
220  */
221 #define	CACHEFS_DEV_TO_DEV32_COPY(in_dev, out_dev, error)		\
222 	if (cmpldev((dev32_t *)&(out_dev), in_dev) == 0)		\
223 		error = EOVERFLOW;
224 
225 #define	CACHEFS_DEV32_TO_DEV_COPY(in_dev, out_dev)			\
226 	out_dev = (dev_t)expldev(in_dev);
227 
228 #define	TIME_OVERFLOW(tval)						\
229 	((tval) < TIME32_MIN || (tval) > TIME32_MAX)
230 
231 /* Set the referred to time value. Set error if overflow */
232 #define	CACHEFS_TIME_TO_TIME32_COPY(in_tval, out_tval, error)		\
233 	out_tval = (in_tval);						\
234 	if (TIME_OVERFLOW(in_tval))					\
235 		error = EOVERFLOW;
236 
237 #define	CACHEFS_TIME32_TO_TIME_COPY(in_tval, out_tval)			\
238 	out_tval = (in_tval);
239 
240 /* Set the cfs_timestruc_t with values from input timestruc_t */
241 #define	CACHEFS_TS_TO_TS32_COPY(in_tsp, out_tsp, error)			\
242 	(out_tsp)->tv_nsec = (in_tsp)->tv_nsec;				\
243 	CACHEFS_TIME_TO_TIME32_COPY((in_tsp)->tv_sec, (out_tsp)->tv_sec, error)
244 
245 #define	CACHEFS_TS32_TO_TS_COPY(in_tsp, out_tsp)			\
246 	(out_tsp)->tv_nsec = (in_tsp)->tv_nsec;				\
247 	CACHEFS_TIME32_TO_TIME_COPY((in_tsp)->tv_sec, (out_tsp)->tv_sec)
248 
249 /* CACHEFS_FID_COPY copies between two fids */
250 #define	CACHEFS_FID_COPY(in_fidp, out_fidp)				\
251 	(out_fidp)->fid_len = (in_fidp)->fid_len;			\
252 	bcopy((in_fidp)->fid_data, (out_fidp)->fid_data, (in_fidp)->fid_len)
253 
254 #define	CACHEFS_VATTR_TO_VATTR32_COPY(in_vattrp, out_vattrp, error)	\
255 	(out_vattrp)->va_mask = (in_vattrp)->va_mask;			\
256 	(out_vattrp)->va_type = (in_vattrp)->va_type;			\
257 	(out_vattrp)->va_mode = (in_vattrp)->va_mode;			\
258 	(out_vattrp)->va_uid = (in_vattrp)->va_uid;			\
259 	(out_vattrp)->va_gid = (in_vattrp)->va_gid;			\
260 	CACHEFS_DEV_TO_DEV32_COPY((in_vattrp)->va_fsid,			\
261 		(out_vattrp)->va_fsid, error);				\
262 	(out_vattrp)->va_nodeid = (in_vattrp)->va_nodeid;		\
263 	(out_vattrp)->va_nlink = (in_vattrp)->va_nlink;			\
264 	(out_vattrp)->va_size = (in_vattrp)->va_size;			\
265 	CACHEFS_TS_TO_TS32_COPY(&(in_vattrp)->va_atime,			\
266 		&(out_vattrp)->va_atime, error);			\
267 	CACHEFS_TS_TO_TS32_COPY(&(in_vattrp)->va_mtime,			\
268 		&(out_vattrp)->va_mtime, error);			\
269 	CACHEFS_TS_TO_TS32_COPY(&(in_vattrp)->va_ctime, 		\
270 		&(out_vattrp)->va_ctime, error);			\
271 	CACHEFS_DEV_TO_DEV32_COPY((in_vattrp)->va_rdev,			\
272 		(out_vattrp)->va_rdev, error);				\
273 	(out_vattrp)->va_blksize = (in_vattrp)->va_blksize;		\
274 	(out_vattrp)->va_nblocks = (in_vattrp)->va_nblocks;		\
275 	(out_vattrp)->va_seq = 0
276 
277 #define	CACHEFS_VATTR32_TO_VATTR_COPY(in_vattrp, out_vattrp)		\
278 	(out_vattrp)->va_mask = (in_vattrp)->va_mask;			\
279 	(out_vattrp)->va_type = (in_vattrp)->va_type;			\
280 	(out_vattrp)->va_mode = (in_vattrp)->va_mode;			\
281 	(out_vattrp)->va_uid = (in_vattrp)->va_uid;			\
282 	(out_vattrp)->va_gid = (in_vattrp)->va_gid;			\
283 	CACHEFS_DEV32_TO_DEV_COPY((in_vattrp)->va_fsid,			\
284 		(out_vattrp)->va_fsid);					\
285 	(out_vattrp)->va_nodeid = (in_vattrp)->va_nodeid;		\
286 	(out_vattrp)->va_nlink = (in_vattrp)->va_nlink;			\
287 	(out_vattrp)->va_size = (in_vattrp)->va_size;			\
288 	CACHEFS_TS32_TO_TS_COPY(&(in_vattrp)->va_atime,			\
289 		&(out_vattrp)->va_atime);				\
290 	CACHEFS_TS32_TO_TS_COPY(&(in_vattrp)->va_mtime,			\
291 		&(out_vattrp)->va_mtime);				\
292 	CACHEFS_TS32_TO_TS_COPY(&(in_vattrp)->va_ctime,			\
293 		&(out_vattrp)->va_ctime);				\
294 	CACHEFS_DEV32_TO_DEV_COPY((in_vattrp)->va_rdev,			\
295 		(out_vattrp)->va_rdev);					\
296 	(out_vattrp)->va_blksize = (in_vattrp)->va_blksize;		\
297 	(out_vattrp)->va_nblocks = (in_vattrp)->va_nblocks;		\
298 	(out_vattrp)->va_seq = 0
299 
300 #else /* not _SYSCALL32 && _LP64 */
301 
302 /*
303  * The cfs_* types are used to represent on-disk data, since its size is
304  * independent of the kernel memory model (in the LP64 case)
305  */
306 typedef time_t			cfs_time_t;
307 typedef timestruc_t		cfs_timestruc_t;
308 typedef vattr_t			cfs_vattr_t;
309 typedef fid_t			cfs_fid_t;
310 
311 #define	cfs_timespec		timespec
312 #define	cfs_vattr		vattr
313 #define	cfs_fid			fid
314 
315 #define	TIME_OVERFLOW(tval)	FALSE
316 
317 #define	CACHEFS_DEV_TO_DEV32_COPY(in_dev, out_dev, error)		\
318 	out_dev = (in_dev)
319 
320 #define	CACHEFS_DEV32_TO_DEV_COPY(in_dev, out_dev)			\
321 	out_dev = (in_dev)
322 
323 #define	CACHEFS_TIME_TO_TIME32_COPY(in_tval, out_tval, error)		\
324 	out_tval = (in_tval)
325 
326 #define	CACHEFS_TIME32_TO_TIME_COPY(in_tval, out_tval)			\
327 	out_tval = (in_tval)
328 
329 #define	CACHEFS_TS_TO_TS32_COPY(in_tsp, out_tsp, error)			\
330 	*(out_tsp) = *(in_tsp)
331 
332 #define	CACHEFS_TS32_TO_TS_COPY(in_tsp, out_tsp)			\
333 	*(out_tsp) = *(in_tsp)
334 
335 #define	CACHEFS_FID_COPY(in_fidp, out_fidp)				\
336 	*(out_fidp) = *(in_fidp)
337 
338 #define	CACHEFS_VATTR_TO_VATTR32_COPY(in_vattrp, out_vattrp, error)	\
339 	*(out_vattrp) = *(in_vattrp);					\
340 	(out_vattrp)->va_seq = 0
341 
342 #define	CACHEFS_VATTR32_TO_VATTR_COPY(in_vattrp, out_vattrp)		\
343 	*(out_vattrp) = *(in_vattrp);					\
344 	(out_vattrp)->va_seq = 0
345 
346 #endif /* _SYSCALL32 && _LP64 */
347 
348 /*
349  * The "cfs_*" structs below refer to the on-disk structures. Presently
350  * they are 32-bit based. When they change to 64-bit, we'd have to modify the
351  * macros below accordingly.
352  */
353 #define	CACHEFS_DEV_TO_CFS_DEV_COPY(in_dev, out_dev, error)		\
354 	CACHEFS_DEV_TO_DEV32_COPY(in_dev, out_dev, error)
355 
356 #define	CACHEFS_CFS_DEV_TO_DEV_COPY(in_dev, out_dev)		\
357 	CACHEFS_DEV32_TO_DEV_COPY(in_dev, out_dev)
358 
359 #define	CACHEFS_TIME_TO_CFS_TIME_COPY(in_tval, out_tval, error)		\
360 	CACHEFS_TIME_TO_TIME32_COPY(in_tval, out_tval, error)
361 
362 #define	CACHEFS_CFS_TIME_TO_TIME_COPY(in_tval, out_tval)		\
363 	CACHEFS_TIME32_TO_TIME_COPY(in_tval, out_tval)
364 
365 #define	CACHEFS_TS_TO_CFS_TS_COPY(in_tsp, out_tsp, error)		\
366 	CACHEFS_TS_TO_TS32_COPY(in_tsp, out_tsp, error)
367 
368 #define	CACHEFS_CFS_TS_TO_TS_COPY(in_tsp, out_tsp)			\
369 	CACHEFS_TS32_TO_TS_COPY(in_tsp, out_tsp)
370 
371 #define	CACHEFS_VATTR_TO_CFS_VATTR_COPY(in_vattrp, out_vattrp, error)	\
372 	CACHEFS_VATTR_TO_VATTR32_COPY(in_vattrp, out_vattrp, error)
373 
374 #define	CACHEFS_CFS_VATTR_TO_VATTR_COPY(in_vattrp, out_vattrp)		\
375 	CACHEFS_VATTR32_TO_VATTR_COPY(in_vattrp, out_vattrp)
376 
377 #include <sys/fs/cachefs_fscache.h>
378 #include <sys/fs/cachefs_filegrp.h>
379 
380 /*
381  * One cache_label structure per cache. Contains mainly user defined or
382  * default values for cache resource management. Contents is static.
383  * The value cl_maxfiles is not used any where in cachefs code. If and when
384  * this is really used the cl_maxfiles should be declared as a 64bit value
385  * for large file support.
386  * The maxblks, blkhiwat, blklowat, blocktresh, blockmin, may need to be
387  * 64bit values when we actually start supporting file systems of size
388  * greater than 1 terabyte.
389  */
390 struct cache_label {
391 	int	cl_cfsversion;	/* cfs version number */
392 	int	cl_maxblks;	/* max blocks to be used by cache */
393 	int	cl_blkhiwat;	/* high water-mark for block usage */
394 	int	cl_blklowat;	/* low water-mark for block usage */
395 	int	cl_maxinodes;	/* max inodes to be used by cache */
396 	int	cl_filehiwat;	/* high water-mark for inode usage */
397 	int	cl_filelowat;	/* low water-mark for indoe usage */
398 	int	cl_blocktresh;	/* block max usage treshold */
399 	int	cl_blockmin;	/* block min usage treshold */
400 	int	cl_filetresh;	/* inode max usage treshold */
401 	int	cl_filemin;	/* inode min usage treshold */
402 	int	cl_maxfiles;	/* max cache file size */
403 };
404 
405 /*
406  * One cache_usage structure per cache. Keeps track of cache usage figures.
407  * Contents gets updated frequently.
408  */
409 struct cache_usage {
410 	int	cu_blksused;	/* actual number of blocks used */
411 	int	cu_filesused;	/* actual number of files used */
412 	uint_t	cu_flags;	/* Cache state flags */
413 	ushort_t cu_unique;	/* Fid persistent uniquifier */
414 };
415 
416 #define	CUSAGE_ACTIVE	1	/* Cache is active */
417 #define	CUSAGE_NEED_ADJUST 2	/* Adjust uniquifier before assigning new fid */
418 
419 /*
420  * RL list identifiers.
421  */
422 enum cachefs_rl_type {
423 	CACHEFS_RL_NONE = 0x101,
424 	CACHEFS_RL_FREE,
425 	CACHEFS_RL_GC,
426 	CACHEFS_RL_ACTIVE,
427 	CACHEFS_RL_ATTRFILE,
428 	CACHEFS_RL_MODIFIED,
429 	CACHEFS_RL_PACKED,
430 	CACHEFS_RL_PACKED_PENDING,
431 	CACHEFS_RL_MF
432 };
433 #define	CACHEFS_RL_START CACHEFS_RL_NONE
434 #define	CACHEFS_RL_END CACHEFS_RL_MF
435 #define	CACHEFS_RL_CNT	(CACHEFS_RL_END - CACHEFS_RL_START + 1)
436 #define	CACHEFS_RL_INDEX(X)	(X - CACHEFS_RL_START)
437 
438 struct cachefs_rl_listhead {
439 	uint_t		rli_front;		/* front of list */
440 	uint_t		rli_back;		/* back of list */
441 	int		rli_blkcnt;		/* number of 8k blocks */
442 	int		rli_itemcnt;		/* number of items on list */
443 };
444 typedef struct cachefs_rl_listhead cachefs_rl_listhead_t;
445 
446 /*
447  * Resource List information.  One per cache.
448  */
449 struct cachefs_rl_info {
450 	uint_t		rl_entries;	/* number of entries allocated in rl */
451 	cfs_time_t	rl_gctime;	/* time of item on front of gc list */
452 
453 	/* heads of the various lists */
454 	cachefs_rl_listhead_t	rl_items[CACHEFS_RL_CNT];
455 };
456 typedef struct cachefs_rl_info cachefs_rl_info_t;
457 
458 /*
459  * rl_debug and rl_entry are stored on disk, so they need to be
460  * the same 32-bit vs. 64-bit.
461  */
462 
463 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
464 #pragma pack(4)
465 #endif
466 
467 #ifdef CFSRLDEBUG
468 /*
469  * RL debugging thingy
470  */
471 
472 #define	CACHEFS_RLDB_STACKSIZE	16
473 #define	CACHEFS_RLDB_DEF_MAXCOUNT 5
474 
475 typedef struct rl_debug {
476 	hrtime_t db_hrtime;
477 
478 	uint_t db_attrc: 1;
479 	uint_t db_fsck: 1;
480 	ino64_t db_fsid;
481 	ino64_t db_fileno;
482 	enum cachefs_rl_type db_current;
483 
484 	int db_stackheight;
485 	pc_t db_stack[CACHEFS_RLDB_STACKSIZE];
486 
487 	struct rl_debug *db_next;
488 } rl_debug_t;
489 
490 extern time_t cachefs_dbvalid;
491 extern struct kmem_cache *cachefs_rl_debug_cache;
492 extern kmutex_t cachefs_rl_debug_mutex;
493 #endif /* CFSRLDEBUG */
494 
495 /*
496  * RL Entry type.
497  */
498 
499 typedef struct rl_entry {
500 	uint_t rl_attrc: 1;
501 	uint_t rl_fsck: 1; /* used by fsck; true => rl_current is correct */
502 	uint_t rl_local: 1; /* 1 means a local file */
503 
504 #ifdef CFSRLDEBUG
505 	cfs_time_t rl_dbvalid; /* this == cachefs_dbvalid => trust rl_debug */
506 	rl_debug_t *rl_debug;
507 #endif /* CFSRLDEBUG */
508 
509 	ino64_t rl_fsid;
510 	ino64_t rl_fileno;
511 
512 	enum cachefs_rl_type rl_current;
513 	uint_t rl_fwd_idx;
514 	uint_t rl_bkwd_idx;
515 } rl_entry_t;
516 
517 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
518 #pragma pack()
519 #endif
520 
521 /*
522  * rl entries per MAXBSIZE chunk.  rl_entry_t's size need not divide
523  * MAXBSIZE, as long as this constant is an integer (through integer
524  * division) (see cachefs_rl_entry_get()).
525  */
526 
527 #define	CACHEFS_RLPMBS	(MAXBSIZE / (uint_t)sizeof (rl_entry_t))
528 
529 /*
530  * struct cache contains cache-wide information, and provides access
531  * to lower level info. There is one cache structure per cache.
532  */
533 struct cachefscache {
534 	struct cachefscache	*c_next;	/* list of caches */
535 	uint_t			c_flags;	/* misc flags */
536 	struct cache_label	c_label;	/* cache resource info */
537 	struct cache_usage	c_usage;	/* cache usage info */
538 	struct cachefs_rl_info	c_rlinfo;	/* rl global pointers */
539 	struct vnode		*c_resfilevp;	/* resource file vp */
540 	uint_t			c_rl_window;	/* window mapped in */
541 	rl_entry_t		*c_rl_entries;	/* mapping for rl entries */
542 	struct vnode		*c_dirvp;	/* cache directory vp */
543 	struct vnode		*c_lockvp;	/* lock file vp */
544 	struct vnode		*c_lostfoundvp;	/* lost+found directory vp */
545 	int			c_refcnt;	/* active fs ref count */
546 	struct fscache		*c_fslist;	/* fscache list head */
547 	struct cachefs_workq	c_workq;	/* async work */
548 	kmutex_t		c_contentslock; /* protect cache struct */
549 	kmutex_t		c_fslistlock;	/* protect fscache list */
550 	kmutex_t		c_mflock;	/* protect modified fixes */
551 	ushort_t		c_unique;	/* In core fid uniquifier */
552 	kcondvar_t		c_cwcv;		/* gc wait on work to do */
553 	kcondvar_t		c_cwhaltcv;	/* wait on gc thread exit */
554 	uint_t			c_gc_count;	/* garbage collection count */
555 	time_t			c_gc_time;	/* last garbage collection */
556 	time_t			c_gc_before;	/* atime of front before gc */
557 	time_t			c_gc_after;	/* atime of front after gc */
558 	uint_t			c_apop_inqueue;	/* # async pops queued */
559 	pid_t			c_rootdaemonid;	/* pid of root cachefsd */
560 	struct cachefs_log_cookie
561 				*c_log;		/* in-core logging stuff */
562 	struct cachefs_log_control
563 				*c_log_ctl;	/* on-disk logging stuff */
564 	kmutex_t		c_log_mutex;	/* protects c_log* */
565 };
566 
567 extern struct kmem_cache *cachefs_cache_kmcache;
568 
569 #define	CACHEFS_MAX_APOP_INQUEUE	50	/* default value for below */
570 extern uint_t cachefs_max_apop_inqueue;		/* max populations pending */
571 
572 /*
573  * Various cache structure flags.
574  */
575 #define	CACHE_NOCACHE		0x1	/* all cache refs go to back fs */
576 #define	CACHE_ALLOC_PENDING	0x4	/* Allocation pending */
577 #define	CACHE_NOFILL		0x8	/* No fill mode */
578 #define	CACHE_GARBAGE_COLLECT	0x10	/* Garbage collect in progress */
579 #define	CACHE_CACHEW_THREADRUN	0x20	/* Cachep worker thread is alive */
580 #define	CACHE_CACHEW_THREADEXIT 0x40	/* cachew thread should exit */
581 #define	CACHE_DIRTY		0x80
582 #define	CACHE_PACKED_PENDING	0x100	/* Packed pending work to do */
583 #define	CACHE_CHECK_RLTYPE	0x200	/* double-check with resource lists */
584 
585 /*
586  * Values for the mount options flag, opt_flags.
587  */
588 /*
589  * Mount options
590  */
591 #define	CFS_WRITE_AROUND	0x01	/* write-around */
592 #define	CFS_NONSHARED		0x02	/* write to cache and back file */
593 #define	CFS_NOCONST_MODE	0x08	/* no-op consistency mode */
594 #define	CFS_ACCESS_BACKFS	0x10	/* pass VOP_ACCESS to backfs */
595 #define	CFS_CODCONST_MODE	0x80	/* cod consistency mode */
596 #define	CFS_DISCONNECTABLE	0x100	/* server not reponding option */
597 #define	CFS_SOFT		0x200	/* soft mounted */
598 #define	CFS_NOACL		0x400	/* ACLs are disabled in this fs */
599 #define	CFS_LLOCK		0x800	/* use local file/record locks */
600 #define	CFS_SLIDE		0x1000	/* slide backfs under cachefs */
601 #define	CFS_NOFILL		0x2000	/* start in nofill mode */
602 #define	CFS_BACKFS_NFSV4	0x4000	/* back filesystem is NFSv4 */
603 
604 #define	MAXCOOKIE_SIZE	36
605 
606 #define	C_BACK_CHECK	0x2
607 
608 /*
609  * Macro to determine if this is a snr error where we should do a
610  * state transition.
611  */
612 
613 #define	CFS_TIMEOUT(FSCP, ERROR) \
614 	(ERROR && CFS_ISFS_SNR(FSCP) && \
615 	(((ERROR) == ETIMEDOUT) || ((ERROR) == EIO)))
616 
617 /*
618  * Macros to assert that cachefs fscache and cnode are in
619  * sync with NFSv4. Note that NFSv4 always passes-through
620  * the vnode calls directly to the backfilesystem. For
621  * this to work:
622  * (1) cachefs is always setup for connected operation,
623  * (2) cachefs options (example disconnectable (snr), nonshared, etc)
624  *     are disabled, and
625  * (3) the back filesystem vnode pointer always exists
626  *      (except after a remove operation)
627  * (4) the front filesystem vnode pointer is always NULL.
628  */
629 #ifdef DEBUG
630 #define	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp) \
631 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) { \
632 		ASSERT((fscp)->fs_info.fi_mntflags == CFS_BACKFS_NFSV4); \
633 		ASSERT((fscp)->fs_cdconnected == CFS_CD_CONNECTED); \
634 	}
635 #define	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp) \
636 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) { \
637 		if (MUTEX_HELD(&cp->c_statelock)) { \
638 			ASSERT((cp)->c_backvp != NULL || \
639 				((cp)->c_flags & CN_DESTROY) != 0); \
640 			ASSERT((cp)->c_frontvp == NULL); \
641 		} else { \
642 			mutex_enter(&(cp)->c_statelock); \
643 			ASSERT((cp)->c_backvp != NULL || \
644 				((cp)->c_flags & CN_DESTROY) != 0); \
645 			ASSERT((cp)->c_frontvp == NULL); \
646 			mutex_exit(&cp->c_statelock); \
647 		} \
648 	}
649 #else
650 #define	CFS_BACKFS_NFSV4_ASSERT_FSCACHE(fscp)
651 #define	CFS_BACKFS_NFSV4_ASSERT_CNODE(cp)
652 #endif	/* DEBUG */
653 
654 #ifdef CFSDEBUG
655 #define	CFS_DPRINT_BACKFS_NFSV4(fscp, x) \
656 	if (CFS_ISFS_BACKFS_NFSV4(fscp)) { \
657 		CFS_DEBUG(CFSDEBUG_VOPS_NFSV4) \
658 			printf x; \
659 	}
660 #else
661 #define	CFS_DPRINT_BACKFS_NFSV4(fscp, x)
662 #endif /* CFSDEBUG */
663 
664 /*
665  * cachefs_allocmap and cfs_cachefs_metadata are stored on disk,
666  * so they need to be the same 32-bit vs. 64-bit.
667  */
668 
669 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
670 #pragma pack(4)
671 #endif
672 
673 /*
674  * Large file support. The start offset of the cached file can be
675  * greater than 2GB and by coelescing the different chunks we may
676  * end up having a chunk of siz3 > 2GB.
677  */
678 
679 struct cachefs_allocmap {
680 	u_offset_t		am_start_off;	/* Start offset of this chunk */
681 	u_offset_t		am_size;	/* size of this chunk */
682 };
683 
684 #define	C_MAX_ALLOCINFO_SLOTS	32
685 
686 /*
687  * CFS fastsymlinks. For symlink of size < C_FSL_SIZE, the symlink
688  * is stored in the cnode allocmap array.
689  */
690 #define	C_FSL_SIZE	(sizeof (struct cachefs_allocmap) * \
691 			C_MAX_ALLOCINFO_SLOTS)
692 
693 /*
694  * Structure representing a cached object in memory.
695  */
696 struct cachefs_metadata {
697 	struct vattr		md_vattr;	/* attributes */
698 	o_mode_t		md_aclclass;	/* CLASS_OBJ perm for ACL */
699 	ushort_t		md_pad1;	/* compiler padding */
700 	fid_t			md_cookie;	/* back fid */
701 	int			md_flags;	/* various flags */
702 	uint_t			md_rlno;	/* rl entry */
703 	enum cachefs_rl_type	md_rltype;	/* rl type */
704 	int			md_consttype;	/* type of consistency */
705 	fid_t			md_fid;		/* fid of front file */
706 	uint_t			md_frontblks;	/* # blks used in frontfs */
707 	uint_t			md_gen;		/* fid uniquifier */
708 	struct cfs_cid		md_parent;	/* id of parent */
709 	timestruc_t		md_timestamp;	/* front file timestamp */
710 	timestruc_t		md_x_time;	/* see consistency routines */
711 	timestruc_t		md_localmtime;	/* persistent local mtime */
712 	timestruc_t		md_localctime;	/* persistent local ctime */
713 	uint_t			md_resettimes;	/* when to reset local times */
714 	ino64_t			md_localfileno;	/* persistent local inum */
715 	uint_t			md_resetfileno;	/* when to reset local fileno */
716 	uint_t			md_seq;		/* seq number for putpage */
717 	int			md_allocents;	/* nbr of entries in allocmap */
718 	struct cachefs_allocmap	md_allocinfo[C_MAX_ALLOCINFO_SLOTS];
719 };
720 typedef struct cachefs_metadata cachefs_metadata_t;
721 
722 #if (defined(_SYSCALL32) && defined(_LP64))
723 
724 /*
725  * fid_t is long aligned, so user fid could be only 4 byte aligned.
726  * Since vnode/vfs calls require fid_t (which would be 8 byte aligned in
727  * _LP64), we would have to copy the user's value (and on-disk data) in/out.
728  */
729 /* on-disk metadata structure - fid aligned to int, time is 32-bit */
730 
731 struct cfs_cachefs_metadata {
732 	struct cfs_vattr	md_vattr;	/* attributes */
733 	o_mode_t		md_aclclass;	/* CLASS_OBJ perm for ACL */
734 	cfs_fid_t		md_cookie;	/* back fid */
735 	int			md_flags;	/* various flags */
736 	uint_t			md_rlno;	/* rl entry */
737 	enum cachefs_rl_type	md_rltype;	/* rl type */
738 	int			md_consttype;	/* type of consistency */
739 	cfs_fid_t		md_fid;		/* fid of front file */
740 	uint_t			md_frontblks;	/* # blks used in frontfs */
741 	uint_t			md_gen;		/* fid uniquifier */
742 	struct cfs_cid		md_parent;	/* id of parent */
743 	cfs_timestruc_t		md_timestamp;	/* front file timestamp */
744 	cfs_timestruc_t		md_x_time;	/* see consistency routines */
745 	cfs_timestruc_t		md_localmtime;	/* persistent local mtime */
746 	cfs_timestruc_t		md_localctime;	/* persistent local ctime */
747 	uint_t			md_resettimes;	/* when to reset local times */
748 	ino64_t			md_localfileno;	/* persistent local inum */
749 	uint_t			md_resetfileno;	/* when to reset local fileno */
750 	uint_t			md_seq;		/* seq number for putpage */
751 	int			md_allocents;	/* nbr of entries in allocmap */
752 	struct cachefs_allocmap	md_allocinfo[C_MAX_ALLOCINFO_SLOTS];
753 };
754 typedef struct cfs_cachefs_metadata cfs_cachefs_metadata_t;
755 
756 #else /* not _SYSCALL32 && _LP64 */
757 
758 typedef cachefs_metadata_t	cfs_cachefs_metadata_t;
759 
760 #define	cfs_cachefs_metadata	cachefs_metadata
761 
762 #endif /* _SYSCALL32 && _LP64 */
763 
764 #if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4
765 #pragma pack()
766 #endif
767 
768 /*
769  * Various flags to be stored in md_flags field of the metadata.
770  */
771 #define	MD_CREATEDONE	0x1		/* create was done to backfs */
772 #define	MD_POPULATED	0x2		/* front file or dir is populated */
773 #define	MD_FILE		0x4		/* front file or dir exists */
774 #define	MD_FASTSYMLNK	0x8		/* fast symbolic link */
775 #define	MD_PACKED	0x10		/* file is packed */
776 #define	MD_INVALREADDIR	0x40		/* repopulate on readdir */
777 #define	MD_PUTPAGE	0x200		/* we have already logged a putpage */
778 #define	MD_FREE		0x400		/* not used */
779 #define	MD_PUSHDONE	0x800		/* set if file pushed to back fs */
780 #define	MD_MAPPING	0x1000		/* set if cid mapping space written */
781 #define	MD_ACL		0x2000		/* file has a cached acl */
782 #define	MD_ACLDIR	0x4000		/* front `dir' exists for holding acl */
783 #define	MD_LOCALMTIME	0x8000		/* do not overwrite md_localmtime */
784 #define	MD_LOCALCTIME	0x10000		/* do not overwrite md_localctime */
785 #define	MD_LOCALFILENO	0x20000		/* do not overwrite md_localfileno */
786 #define	MD_NEEDATTRS	0x40000		/* new attrs needed at next check */
787 
788 #define	C_MAX_MOUNT_FSCDIRNAME		128
789 /*
790  * cachefs mount structure and related data
791  */
792 struct cachefs_mountargs {
793 	struct cachefsoptions	cfs_options;	/* consistency modes, etc. */
794 	char			*cfs_fsid;	/* CFS ID fpr file system */
795 	char			cfs_cacheid[C_MAX_MOUNT_FSCDIRNAME];
796 	/* CFS fscdir name */
797 	char			*cfs_cachedir;	/* path for this cache dir */
798 	char			*cfs_backfs;	/* back filesystem dir */
799 	uint_t			cfs_acregmin;	/* same as nfs values */
800 	uint_t			cfs_acregmax;
801 	uint_t			cfs_acdirmin;
802 	uint_t			cfs_acdirmax;
803 	char			*cfs_hostname;  /* server name */
804 	char			*cfs_backfsname; /* back filesystem name */
805 };
806 
807 #ifdef _SYSCALL32
808 struct cachefs_mountargs32 {
809 	struct cachefsoptions	cfs_options;	/* consistency modes, etc. */
810 	caddr32_t		cfs_fsid;	/* CFS ID fpr file system */
811 	char			cfs_cacheid[C_MAX_MOUNT_FSCDIRNAME];
812 	/* CFS fscdir name */
813 	caddr32_t		cfs_cachedir;	/* path for this cache dir */
814 	caddr32_t		cfs_backfs;	/* back filesystem dir */
815 	uint32_t		cfs_acregmin;	/* same as nfs values */
816 	uint32_t		cfs_acregmax;
817 	uint32_t		cfs_acdirmin;
818 	uint32_t		cfs_acdirmax;
819 	caddr32_t		cfs_hostname;  /* server name */
820 	caddr32_t		cfs_backfsname; /* back filesystem name */
821 };
822 #endif /* _SYSCALL32 */
823 
824 /*
825  * struct cachefsops - consistency modules.
826  */
827 struct cachefsops {
828 	int	(*co_init_cobject)();
829 	int	(*co_check_cobject)();
830 	void	(*co_modify_cobject)();
831 	void	(*co_invalidate_cobject)();
832 	void	(*co_convert_cobject)();
833 };
834 
835 
836 
837 /*
838  * The attrcache file consists of a attrcache_header structure and an
839  * array of attrcache_slot structures (one per front file).
840  */
841 
842 /*
843  * Attrcache file format
844  *
845  *	Header
846  *	Offset array (# of entries = file group size)
847  *	alloc list	(1 bit per entry, 0 = free) Note that the
848  *			file will be extended as needed
849  *	attrcache entries
850  *
851  */
852 struct attrcache_header {
853 	uint_t		ach_count;		/* number of entries */
854 	int		ach_nffs;		/* number of front files */
855 	int		ach_nblks;		/* number of allocated blocks */
856 	uint_t		ach_rlno;		/* rl entry for this file */
857 	enum cachefs_rl_type ach_rl_current;	/* which list we're on */
858 };
859 
860 /*
861  * We assume that the seek offset to metadata will never be > 2GB.
862  * The filegrp size is 256 and the current calculations of the sizes
863  * of the data structures show that the ach_offset value here will not
864  * be > 2GB.
865  */
866 
867 struct attrcache_index {
868 	uint_t	ach_written:1;		/* 1 if metadata written */
869 	uint_t	ach_offset:31;		/* seek offset to metadata */
870 };
871 
872 /*
873  * cnode structure, one per file.
874  */
875 #define	c_attr			c_metadata.md_vattr
876 #define	c_cookie		c_metadata.md_cookie
877 #define	c_fileno		c_id.cid_fileno
878 
879 /*
880  * LOCKS:	c_rwlock	Read / Write serialization
881  *		c_statelock	Protects most other fields in the cnode
882  *		c_popcv		Condvar used to prevent routines from nuking
883  *				a cnode which is currently being populated.
884  *				Threads blocked on it will be woken when the
885  *				populate completes.
886  *		c_iocv		broadcast, but never waited on - unused?
887  *		c_iomutex	c_nio and c_ioflags
888  *
889  * Fields protected by other locks:
890  *
891  *		c_next		fg_cnodelock in the filegrp struct
892  *		c_idleback	fs_idlelock in fscache struct
893  *		c_idlefront	fs_idlelock in fscache struct
894  *
895  * Large File support: c_size goes to u_offset_t and the apopoff type
896  * goes to offset_t.
897  */
898 struct cnode {
899 	int		c_flags;	/* see below */
900 	struct cnode	*c_next;	/* next cnode in fgp list */
901 	struct cnode	*c_idleback;	/* idle list back ptr */
902 	struct cnode	*c_idlefront;	/* idle list front ptr */
903 	struct vnode	*c_frontvp;	/* front vnode pointer */
904 	struct vnode	*c_backvp;	/* back vnode pointer */
905 	struct vnode	*c_acldirvp;	/* dir for storing dflt ACL */
906 	u_offset_t	c_size;		/* client view of the size */
907 	struct filegrp	*c_filegrp;	/* back pointer to filegrp */
908 	struct cfs_cid	c_id;		/* unique file number */
909 	int		c_invals;	/* # of recent dir invals */
910 	int		c_usage;	/* Usefulness of cache */
911 	struct vnode	*c_vnode;	/* pointer to vnode */
912 	struct cachefs_metadata	c_metadata;	/* cookie, ... */
913 	int		c_error;
914 	kmutex_t	c_statelock;	/* statelock */
915 	krwlock_t	c_rwlock;	/* serialize write/setattr requests */
916 	kcondvar_t	c_popcv;	/* cnode populate cond var. */
917 	kthread_id_t	c_popthrp;	/* threadp performing pop */
918 	vnode_t		*c_unldvp;	/* dir to unlink in */
919 	char		*c_unlname;	/* name to unlink */
920 	cred_t		*c_unlcred;	/* creds for unlink */
921 	int		c_nio;		/* Number of io's pending */
922 	uint_t		c_ioflags;
923 	kcondvar_t	c_iocv;		/* IO cond var. */
924 	kmutex_t	c_iomutex;
925 	cred_t		*c_cred;
926 	int		c_ipending;	/* 1 if inactive is pending */
927 	int		c_mapcnt;	/* number of mapped blocks */
928 	offset_t	c_apopoffset;	/* offset for async pop */
929 	uint_t		c_apoplen;	/* length for async pop */
930 	u_offset_t	c_modaddr;	/* writepage offset */
931 	int		c_rdcnt;	/* # of read opens for backvp */
932 	int		c_wrcnt;	/* # of write opens for backvp */
933 };
934 typedef struct cnode cnode_t;
935 
936 extern struct kmem_cache *cachefs_cnode_cache;
937 
938 /*
939  * Directory caching parameters - First cut...
940  */
941 #define	CFS_DIRCACHE_COST	3
942 #define	CFS_DIRCACHE_INVAL	3
943 #define	CFS_DIRCACHE_ENABLE	(CFS_DIRCACHE_INVAL * CFS_DIRCACHE_COST)
944 
945 /*
946  * Conversion macros
947  */
948 #define	VTOC(VP)		((struct cnode *)((void *)((VP)->v_data)))
949 #define	CTOV(CP)		((CP)->c_vnode)
950 #define	VFS_TO_FSCACHE(VFSP)	((struct fscache *)((void *)((VFSP)->vfs_data)))
951 #define	C_TO_FSCACHE(CP)	(VFS_TO_FSCACHE(CTOV(CP)->v_vfsp))
952 
953 /*
954  * Various flags stored in the flags field of the cnode structure.
955  */
956 #define	CN_NOCACHE	0x1		/* no-cache mode */
957 #define	CN_DESTROY	0x2		/* destroy when inactive */
958 #define	CN_ROOT		0x4		/* root of the file system */
959 #define	CN_IDLE		0x8		/* file is idle */
960 #define	CN_NEEDOPEN	0x10		/* need to open backvp */
961 #define	CN_UPDATED	0x40		/* Metadata was updated - needs sync */
962 #define	CDIRTY		0x80
963 #define	CN_NEED_FRONT_SYNC	0x100	/* front file needs to be sync'd */
964 #define	CN_ALLOC_PENDING	0x200	/* Need to alloc attr cache entry */
965 #define	CN_STALE	0x400		/* cnode is stale */
966 #define	CN_MODIFIED	0x800		/* Object has been written to */
967 #define	CN_POPULATION_PENDING	0x1000	/* Population data needs to be sync'd */
968 #define	CN_ASYNC_POPULATE	0x2000	/* async population pending */
969 #define	CN_ASYNC_POP_WORKING	0x4000	/* async population in progress */
970 #define	CN_PENDRM	0x8000		/* hold off unlink until reconnected */
971 #define	CN_MAPWRITE	0x100000	/* mmapped file that is being written */
972 #define	CN_CMODINPROG	0x200000	/* writepage() in progress */
973 
974 /*
975  * io flags (in c_ioflag)
976  */
977 #define	CIO_PUTPAGES	0x1		/* putpage pending: off==0, len==0 */
978 
979 #define	CFS_MAX_THREADS		5
980 #define	CFS_ASYNC_TIMEOUT	(60 * hz)
981 
982 enum cachefs_cmd {
983 	CFS_INVALID,
984 	CFS_CACHE_SYNC,
985 	CFS_PUTPAGE,
986 	CFS_IDLE,
987 	CFS_POPULATE,
988 	CFS_NOOP
989 };
990 
991 struct cachefs_fs_sync_req {
992 	struct cachefscache *cf_cachep;
993 };
994 
995 struct cachefs_idle_req {
996 	vnode_t *ci_vp;
997 };
998 
999 /*
1000  * Large File support the offset in the vnode for putpage request
1001  * can now be greater than 2GB.
1002  */
1003 
1004 struct cachefs_putpage_req {
1005 	vnode_t *cp_vp;
1006 	offset_t cp_off;
1007 	int cp_len;
1008 	int cp_flags;
1009 };
1010 
1011 /*
1012  * Large File support the offset in the vnode for populate request
1013  * can now be greater than 2GB.
1014  */
1015 
1016 struct cachefs_populate_req {
1017 	vnode_t *cpop_vp;
1018 	offset_t cpop_off;
1019 	size_t cpop_size;
1020 };
1021 
1022 struct cachefs_req {
1023 	struct cachefs_req	*cfs_next;
1024 	enum cachefs_cmd	cfs_cmd;	/* Command to execute */
1025 	cred_t *cfs_cr;
1026 	union {
1027 		struct cachefs_fs_sync_req cu_fs_sync;
1028 		struct cachefs_idle_req cu_idle;
1029 		struct cachefs_putpage_req cu_putpage;
1030 		struct cachefs_populate_req cu_populate;
1031 	} cfs_req_u;
1032 	kmutex_t cfs_req_lock;	/* Protects contents */
1033 };
1034 
1035 extern struct kmem_cache *cachefs_req_cache;
1036 
1037 /*
1038  * Large file support: We allow cachefs to understand the 64 bit inode type.
1039  */
1040 
1041 struct cachefs_fid {
1042 	ushort_t	cf_len;
1043 	ino64_t		cf_fileno;
1044 	uint_t		cf_gen;
1045 };
1046 #define	CFS_FID_SIZE	(sizeof (struct cachefs_fid) - sizeof (ushort_t))
1047 
1048 /*
1049  *
1050  * cachefs kstat stuff.  each time you mount a cachefs filesystem, it
1051  * gets a unique number.  it'll get that number again if you remount
1052  * the same thing.  the number is unique until reboot, but it doesn't
1053  * survive reboots.
1054  *
1055  * each cachefs kstat uses this per-filesystem identifier.  to get the
1056  * valid identifiers, the `cachefs.0.key' kstat has a mapping of all
1057  * the available filesystems.  its structure, cachefs_kstat_key, is
1058  * below.
1059  *
1060  */
1061 
1062 typedef struct cachefs_kstat_key {
1063 	int ks_id;
1064 	int ks_mounted;
1065 	uint64_t ks_vfsp;
1066 	uint64_t ks_mountpoint;
1067 	uint64_t ks_backfs;
1068 	uint64_t ks_cachedir;
1069 	uint64_t ks_cacheid;
1070 } cachefs_kstat_key_t;
1071 extern cachefs_kstat_key_t *cachefs_kstat_key;
1072 extern int cachefs_kstat_key_n;
1073 
1074 /*
1075  * cachefs debugging aid.  cachefs_debug_info_t is a cookie that we
1076  * can keep around to see what was happening at a certain time.
1077  *
1078  * for example, if we have a deadlock on the cnode's statelock
1079  * (i.e. someone is not letting go of it), we can add a
1080  * cachefs_debug_info_t * to the cnode structure, and call
1081  * cachefs_debug_save() whenever we grab the lock.  then, when we're
1082  * deadlocked, we can see what was going on when we grabbed the lock
1083  * in the first place, and (hopefully) why we didn't release it.
1084  */
1085 
1086 #define	CACHEFS_DEBUG_DEPTH		(16)
1087 typedef struct cachefs_debug_info {
1088 	char		*cdb_message;	/* arbitrary message */
1089 	uint_t		cdb_flags;	/* arbitrary flags */
1090 	int		cdb_int;	/* arbitrary int */
1091 	void		*cdb_pointer;	/* arbitrary pointer */
1092 	uint_t		cdb_count;	/* how many times called */
1093 
1094 	cachefscache_t	*cdb_cachep;	/* relevant cachep (maybe undefined) */
1095 	struct fscache	*cdb_fscp;	/* relevant fscache */
1096 	struct cnode	*cdb_cnode;	/* relevant cnode */
1097 	vnode_t		*cdb_frontvp;	/* relevant front vnode */
1098 	vnode_t		*cdb_backvp;	/* relevant back vnode */
1099 
1100 	kthread_id_t	cdb_thread;	/* thread who called */
1101 	hrtime_t	cdb_timestamp;	/* when */
1102 	int		cdb_depth;	/* depth of saved stack */
1103 	pc_t		cdb_stack[CACHEFS_DEBUG_DEPTH]; /* stack trace */
1104 	struct cachefs_debug_info *cdb_next; /* pointer to next */
1105 } cachefs_debug_info_t;
1106 
1107 /*
1108  * cachefs function prototypes
1109  */
1110 #if defined(_KERNEL)
1111 extern int cachefs_getcookie(vnode_t *, struct fid *, struct vattr *,
1112 		cred_t *, uint32_t);
1113 cachefscache_t *cachefs_cache_create(void);
1114 void cachefs_cache_destroy(cachefscache_t *cachep);
1115 int cachefs_cache_activate_ro(cachefscache_t *cachep, vnode_t *cdvp);
1116 void cachefs_cache_activate_rw(cachefscache_t *cachep);
1117 void cachefs_cache_dirty(struct cachefscache *cachep, int lockit);
1118 int cachefs_cache_rssync(struct cachefscache *cachep);
1119 void cachefs_cache_sync(struct cachefscache *cachep);
1120 uint_t cachefs_cache_unique(cachefscache_t *cachep);
1121 void cachefs_do_req(struct cachefs_req *);
1122 
1123 /* cachefs_cnode.c */
1124 void cachefs_cnode_idle(struct vnode *vp, cred_t *cr);
1125 void cachefs_cnode_idleclean(fscache_t *fscp, int unmount);
1126 int cachefs_cnode_inactive(register struct vnode *vp, cred_t *cr);
1127 void cachefs_cnode_listadd(struct cnode *cp);
1128 void cachefs_cnode_listrem(struct cnode *cp);
1129 void cachefs_cnode_free(struct cnode *cp);
1130 void cachefs_cnode_cleanfreelist();
1131 void cachefs_cnode_idleadd(struct cnode *cp);
1132 void cachefs_cnode_idlerem(struct cnode *cp);
1133 int cachefs_cnode_find(filegrp_t *fgp, cfs_cid_t *cidp, fid_t *cookiep,
1134     struct cnode **cpp, struct vnode *vp, vattr_t *vap);
1135 int cachefs_cnode_make(cfs_cid_t *cidp, fscache_t *fscp, fid_t *cookiep,
1136     vattr_t *vap, vnode_t *backvp, cred_t *cr, int flag, cnode_t **cpp);
1137 int cachefs_cid_inuse(filegrp_t *fgp, cfs_cid_t *cidp);
1138 int cachefs_fileno_inuse(fscache_t *fscp, ino64_t fileno);
1139 int cachefs_cnode_create(fscache_t *fscp, vattr_t *vap, int flag,
1140     cnode_t **cpp);
1141 void cachefs_cnode_move(cnode_t *cp);
1142 int cachefs_cnode_lostfound(cnode_t *cp, char *rname);
1143 void cachefs_cnode_sync(cnode_t *cp);
1144 void cachefs_cnode_traverse(fscache_t *fscp, void (*routinep)(cnode_t *));
1145 void cachefs_cnode_stale(cnode_t *cp);
1146 void cachefs_cnode_setlocalstats(cnode_t *cp);
1147 void cachefs_cnode_disable_caching(cnode_t *cp);
1148 
1149 void cachefs_enable_caching(struct fscache *);
1150 
1151 /* cachefs_fscache.c */
1152 void fscache_destroy(fscache_t *);
1153 
1154 /* cachefs_ioctl.h */
1155 int cachefs_pack_common(vnode_t *vp, cred_t *cr);
1156 void cachefs_inum_register(fscache_t *fscp, ino64_t real, ino64_t fake);
1157 ino64_t cachefs_inum_real2fake(fscache_t *fscp, ino64_t real);
1158 
1159 
1160 /* cachefs_subr.c */
1161 int cachefs_sync_metadata(cnode_t *);
1162 int cachefs_cnode_cnt(int);
1163 int cachefs_getbackvp(struct fscache *, struct cnode *);
1164 int cachefs_getfrontfile(cnode_t *);
1165 void cachefs_removefrontfile(cachefs_metadata_t *mdp, cfs_cid_t *cidp,
1166     filegrp_t *fgp);
1167 void cachefs_nocache(cnode_t *);
1168 void cachefs_inval_object(cnode_t *);
1169 void make_ascii_name(cfs_cid_t *cidp, char *strp);
1170 int cachefs_async_halt(struct cachefs_workq *, int);
1171 int cachefs_async_okay(void);
1172 int cachefs_check_allocmap(cnode_t *cp, u_offset_t off);
1173 void cachefs_update_allocmap(cnode_t *, u_offset_t, size_t);
1174 int cachefs_cachesymlink(struct cnode *cp, cred_t *cr);
1175 int cachefs_stuffsymlink(cnode_t *cp, caddr_t buf, int buflen);
1176 int cachefs_readlink_back(cnode_t *cp, cred_t *cr, caddr_t *bufp, int *buflenp);
1177 /*
1178  * void cachefs_cluster_allocmap(struct cnode *, u_offset_t, u_offset_t *,
1179  *	size_t *, size_t);
1180  */
1181 void cachefs_cluster_allocmap(u_offset_t, u_offset_t *, size_t *, size_t,
1182 		struct cnode *);
1183 int cachefs_populate(cnode_t *, u_offset_t, size_t, vnode_t *, vnode_t *,
1184 	u_offset_t, cred_t *);
1185 int cachefs_stats_kstat_snapshot(kstat_t *, void *, int);
1186 cachefs_debug_info_t *cachefs_debug_save(cachefs_debug_info_t *, int,
1187     char *, uint_t, int, void *, cachefscache_t *, struct fscache *,
1188     struct cnode *);
1189 void cachefs_debug_show(cachefs_debug_info_t *);
1190 uint32_t cachefs_cred_checksum(cred_t *cr);
1191 int cachefs_frontfile_size(cnode_t *cp, u_offset_t length);
1192 int cachefs_req_create(void *, void *, int);
1193 void cachefs_req_destroy(void *, void *);
1194 int cachefs_stop_cache(cnode_t *);
1195 
1196 
1197 /* cachefs_resource.c */
1198 void cachefs_rlent_moveto_nolock(cachefscache_t *cachep,
1199     enum cachefs_rl_type type, uint_t entno, size_t);
1200 void cachefs_rlent_moveto(cachefscache_t *, enum cachefs_rl_type, uint_t,
1201     size_t);
1202 void cachefs_rlent_verify(cachefscache_t *, enum cachefs_rl_type, uint_t);
1203 void cachefs_rl_changefileno(cachefscache_t *cachep, uint_t entno,
1204 	ino64_t fileno);
1205 int cachefs_rlent_data(cachefscache_t *cachep, rl_entry_t *valp,
1206     uint_t *entnop);
1207 void cachefs_move_modified_to_mf(cachefscache_t *cachep, fscache_t *fscp);
1208 int cachefs_allocblocks(cachefscache_t *, size_t, enum cachefs_rl_type);
1209 void cachefs_freeblocks(cachefscache_t *, size_t, enum cachefs_rl_type);
1210 void cachefs_freefile(cachefscache_t *);
1211 int cachefs_allocfile(cachefscache_t *);
1212 int cachefs_rl_alloc(struct cachefscache *cachep, rl_entry_t *valp,
1213     uint_t *entnop);
1214 int cachefs_rl_attrc(struct cachefscache *, int, int);
1215 void cachefs_cachep_worker_thread(cachefscache_t *);
1216 void cachefs_rl_cleanup(cachefscache_t *);
1217 int cachefs_rl_entry_get(cachefscache_t *, uint_t, rl_entry_t **);
1218 #ifdef CFSRLDEBUG
1219 void cachefs_rl_debug_save(rl_entry_t *);
1220 void cachefs_rl_debug_show(rl_entry_t *);
1221 void cachefs_rl_debug_destroy(rl_entry_t *);
1222 #endif /* CFSRLDEBUG */
1223 
1224 /* cachefs_log.c */
1225 int cachefs_log_kstat_snapshot(kstat_t *, void *, int);
1226 void cachefs_log_process_queue(cachefscache_t *, int);
1227 int cachefs_log_logfile_open(cachefscache_t *, char *);
1228 struct cachefs_log_cookie
1229 	*cachefs_log_create_cookie(struct cachefs_log_control *);
1230 void cachefs_log_error(cachefscache_t *, int, int);
1231 void cachefs_log_destroy_cookie(struct cachefs_log_cookie *);
1232 
1233 void cachefs_log_mount(cachefscache_t *, int, struct vfs *,
1234     fscache_t *, char *, enum uio_seg, char *);
1235 void cachefs_log_umount(cachefscache_t *, int, struct vfs *);
1236 void cachefs_log_getpage(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1237     uid_t, u_offset_t, size_t);
1238 void cachefs_log_readdir(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1239     uid_t, u_offset_t, int);
1240 void cachefs_log_readlink(cachefscache_t *, int, struct vfs *,
1241     fid_t *, ino64_t, uid_t, size_t);
1242 void cachefs_log_remove(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1243     uid_t);
1244 void cachefs_log_rmdir(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1245     uid_t);
1246 void cachefs_log_truncate(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1247     uid_t, u_offset_t);
1248 void cachefs_log_putpage(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1249     uid_t, u_offset_t, size_t);
1250 void cachefs_log_create(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1251     uid_t);
1252 void cachefs_log_mkdir(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1253     uid_t);
1254 void cachefs_log_rename(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1255     int, uid_t);
1256 void cachefs_log_symlink(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1257     uid_t, int);
1258 void cachefs_log_populate(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1259     u_offset_t, size_t);
1260 void cachefs_log_csymlink(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1261     int);
1262 void cachefs_log_filldir(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1263     u_offset_t);
1264 void cachefs_log_mdcreate(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1265     uint_t);
1266 void cachefs_log_gpfront(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1267     uid_t, u_offset_t, uint_t);
1268 void cachefs_log_rfdir(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1269     uid_t);
1270 void cachefs_log_ualloc(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1271     u_offset_t, size_t);
1272 void cachefs_log_calloc(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t,
1273     u_offset_t, size_t);
1274 void cachefs_log_nocache(cachefscache_t *, int, struct vfs *, fid_t *, ino64_t);
1275 
1276 /* cachefs_vnops.c */
1277 struct vnodeops *cachefs_getvnodeops(void);
1278 int cachefs_lookup_common(vnode_t *dvp, char *nm, vnode_t **vpp,
1279     struct pathname *pnp, int flags, vnode_t *rdir, cred_t *cr);
1280 int cachefs_putpage_common(struct vnode *vp, offset_t off,
1281     size_t len, int flags, cred_t *cr);
1282 ino64_t cachefs_fileno_conflict(fscache_t *fscp, ino64_t old);
1283 int cachefs_remove_connected(vnode_t *dvp, char *nm, cred_t *cr,
1284     vnode_t *vp);
1285 int cachefs_remove_disconnected(vnode_t *dvp, char *nm, cred_t *cr,
1286     vnode_t *vp);
1287 int cachefs_cacheacl(cnode_t *, vsecattr_t *);
1288 void cachefs_purgeacl(cnode_t *);
1289 int cachefs_vtype_aclok(vnode_t *);
1290 
1291 /* cachefs_vfsops.c */
1292 int cachefs_init_vfsops(int);
1293 int cachefs_init_vnops(char *);
1294 void cachefs_kstat_mount(struct fscache *, char *, char *, char *, char *);
1295 void cachefs_kstat_umount(int);
1296 int cachefs_kstat_key_update(kstat_t *, int);
1297 int cachefs_kstat_key_snapshot(kstat_t *, void *, int);
1298 
1299 extern void cachefs_workq_init(struct cachefs_workq *);
1300 extern void cachefs_addqueue(struct cachefs_req *, struct cachefs_workq *);
1301 
1302 
1303 extern void *cachefs_kmem_alloc(size_t, int);
1304 extern void *cachefs_kmem_zalloc(size_t, int);
1305 extern void cachefs_kmem_free(void *, size_t);
1306 extern char *cachefs_strdup(char *);
1307 
1308 #endif /* defined (_KERNEL) */
1309 
1310 
1311 
1312 #define	C_RL_MAXENTS	0x4000		/* Whatever */
1313 
1314 /*
1315  * ioctls.
1316  */
1317 #include <sys/ioccom.h>
1318 #define	_FIOCOD		_IO('f', 78)		/* consistency on demand */
1319 #define	_FIOSTOPCACHE	_IO('f', 86)		/* stop using cache */
1320 
1321 #define	CACHEFSIO_PACK		_IO('f', 81)
1322 #define	CACHEFSIO_UNPACK	_IO('f', 82)
1323 #define	CACHEFSIO_UNPACKALL	_IO('f', 83)
1324 #define	CACHEFSIO_PACKINFO	_IO('f', 84)
1325 #define	CACHEFSIO_DCMD		_IO('f', 85)
1326 
1327 #ifdef __cplusplus
1328 }
1329 #endif
1330 
1331 #endif /* _SYS_FS_CACHEFS_FS_H */
1332