xref: /titanic_44/usr/src/uts/common/sys/fs/ufs_inode.h (revision 13bb89069ebe3cbce237b2708bda9946a2ff4607)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*
31  * University Copyright- Copyright (c) 1982, 1986, 1988
32  * The Regents of the University of California
33  * All Rights Reserved
34  *
35  * University Acknowledgment- Portions of this document are derived from
36  * software developed by the University of California, Berkeley, and its
37  * contributors.
38  */
39 
40 #ifndef	_SYS_FS_UFS_INODE_H
41 #define	_SYS_FS_UFS_INODE_H
42 
43 #pragma ident	"%Z%%M%	%I%	%E% SMI"
44 
45 #include <sys/isa_defs.h>
46 #include <sys/fbuf.h>
47 #include <sys/fdbuffer.h>
48 #include <sys/fcntl.h>
49 #include <sys/uio.h>
50 #include <sys/t_lock.h>
51 #include <sys/thread.h>
52 #include <sys/cred.h>
53 #include <sys/time.h>
54 #include <sys/types32.h>
55 #include <sys/fs/ufs_fs.h>
56 #include <sys/fs/ufs_lockfs.h>
57 #include <sys/fs/ufs_trans.h>
58 #include <sys/kstat.h>
59 #include <sys/fs/ufs_acl.h>
60 #include <sys/fs/ufs_panic.h>
61 #include <sys/dnlc.h>
62 
63 #ifdef	__cplusplus
64 extern "C" {
65 #endif
66 
67 /*
68  * The I node is the focus of all local file activity in UNIX.
69  * There is a unique inode allocated for each active file,
70  * each current directory, each mounted-on file, each mapping,
71  * and the root.  An inode is `named' by its dev/inumber pair.
72  * Data in icommon is read in from permanent inode on volume.
73  *
74  * Each inode has 5 locks associated with it:
75  *	i_rwlock:	Serializes ufs_write and ufs_setattr request
76  *			and allows ufs_read requests to proceed in parallel.
77  *			Serializes reads/updates to directories.
78  *	vfs_dqrwlock:	Manages quota sub-system quiescence.  See below.
79  *	i_contents:	Protects almost all of the fields in the inode
80  *			except for those listed below. When held
81  *			in writer mode also protects those fields
82  *			listed under i_tlock.
83  *	i_tlock:	When i_tlock is held with the i_contents reader
84  *			lock the i_atime, i_mtime, i_ctime,
85  *			i_delayoff, i_delaylen, i_nextrio, i_writes, i_flag
86  *			i_seq, i_writer & i_mapcnt fields are protected.
87  *			For more i_flag locking info see below.
88  *	ih_lock:	Protects inode hash chain buckets
89  *	ifree_lock:	Protects inode freelist
90  *
91  * Lock ordering:
92  *	i_rwlock > i_contents > i_tlock
93  *	i_rwlock > vfs_dqrwlock > i_contents(writer) > i_tlock
94  *	i_contents > i_tlock
95  *	vfs_dqrwlock > i_contents(writer) > i_tlock
96  *	ih_lock > i_contents > i_tlock
97  *
98  * Making major changes to quota sub-system state, while the file
99  * system is mounted required the addition of another lock.  The
100  * primary lock in the quota sub-system is vfs_dqrwlock in the ufsvfs
101  * structure.  This lock is used to manage quota sub-system quiescence
102  * for a particular file system. Major changes to quota sub-system
103  * state (disabling quotas, enabling quotas, and setting new quota
104  * limits) all require the file system to be quiescent and grabbing
105  * vfs_dqrwlock as writer accomplishes this.  On the other hand,
106  * grabbing vfs_dqrwlock as reader makes the quota sub-system
107  * non-quiescent and lets the quota sub-system know that now is not a
108  * good time to change major quota sub-system state.  Typically
109  * vfs_dqrwlock is grabbed for reading before i_contents is grabbed for
110  * writing.  However, there are cases where vfs_dqrwlock is grabbed for
111  * reading without a corresponding i_contents write grab because there
112  * is no relevant inode.  There are also cases where i_contents is
113  * grabbed for writing when a vfs_dqrwlock read grab is not needed
114  * because the inode changes do not affect quotas.
115  *
116  * Unfortunately, performance considerations have required that we be more
117  * intelligent about using i_tlock when updating i_flag.  Ideally, we would
118  * have simply separated out several of the bits in i_flag into their own
119  * ints to avoid problems.  But, instead, we have implemented the following
120  * rules:
121  *
122  *	o You can update any i_flag field while holding the writer-contents,
123  *	  or by holding the reader-contents AND holding i_tlock.
124  *	  You can only call ITIMES_NOLOCK while holding the writer-contents,
125  *	  or by holding the reader-contents AND holding i_tlock.
126  *
127  *	o For a directory, holding the reader-rw_lock is sufficient for setting
128  *	  IACC.
129  *
130  *	o Races with IREF are avoided by holding the reader contents lock
131  *	  and by holding i_tlock in ufs_rmidle, ufs_putapage, and ufs_getpage.
132  *	  And by holding the writer-contents in ufs_iinactive.
133  *
134  *	o The callers are no longer required to handle the calls to ITIMES
135  *	  and ITIMES_NOLOCK.  The functions that set the i_flag bits are
136  *	  responsible for managing those calls.  The exceptions are the
137  *	  bmap routines.
138  *
139  * SVR4 Extended Fundamental Type (EFT) support:
140  * 	The inode structure has been enhanced to support
141  *	32-bit user-id, 32-bit group-id, and 32-bit device number.
142  *	Standard SVR4 ufs also supports 32-bit mode field.  For the reason
143  *	of backward compatibility with the previous ufs disk format,
144  *	32-bit mode field is not supported.
145  *
146  *	The current inode structure is 100% backward compatible with
147  *	the previous inode structure if no user-id or group-id exceeds
148  *	USHRT_MAX, and no major or minor number of a device number
149  *	stored in an inode exceeds 255.
150  *
151  * Rules for managing i_seq:
152  *	o i_seq is locked under the same rules as i_flag
153  *	o The i_ctime or i_mtime MUST never change without increasing
154  *	  the value of i_seq.
155  *	o You may increase the value of i_seq without the timestamps
156  *	  changing, this may decrease the callers performance but will
157  *	  be functionally correct.
158  *	o The common case is when IUPD or ICHG is set, increase i_seq
159  *	  and immediately call ITIMES* or ufs_iupdat to create a new timestamp.
160  *	o A less common case is the setting of IUPD or ICHG and while still
161  *	  holding the correct lock defer the timestamp and i_seq update
162  *	  until later, but it must still be done before the lock is released.
163  *	  bmap_write is an example of this, where the caller does the update.
164  *	o If multiple changes are being made with the timestamps being
165  *	  updated only at the end, a single increase of i_seq is allowed.
166  *	o If changes are made with IUPD or ICHG being set, but
167  *	  the controlling lock is being dropped before the timestamp is
168  *	  updated, there is a risk that another thread will also change
169  *	  the file, update i_flag, and push just one timestamp update.
170  *	  There is also the risk that another thread calls ITIMES or
171  *	  ufs_iupdat without setting IUPD|ICHG and thus not changing i_seq,
172  *	  this will cause ufs_imark to change the timestamps without changing
173  *	  i_seq. If the controlling lock is dropped, ISEQ must be set to
174  *	  force i_seq to be increased on next ufs_imark, but i_seq MUST still
175  *	  be increased by the original setting thread before its deferred
176  *	  call to ITIMES to insure it is increased the correct number of times.
177  */
178 
179 #define	UID_LONG  (o_uid_t)65535
180 				/* flag value to indicate uid is 32-bit long */
181 #define	GID_LONG  (o_uid_t)65535
182 				/* flag value to indicate gid is 32-bit long */
183 
184 #define	NDADDR	12		/* direct addresses in inode */
185 #define	NIADDR	3		/* indirect addresses in inode */
186 #define	FSL_SIZE (NDADDR + NIADDR - 1) * sizeof (daddr32_t)
187 				/* max fast symbolic name length is 56 */
188 
189 #define	i_fs	i_ufsvfs->vfs_bufp->b_un.b_fs
190 #define	i_vfs	i_vnode->v_vfsp
191 
192 struct 	icommon {
193 	o_mode_t ic_smode;	/*  0: mode and type of file */
194 	short	ic_nlink;	/*  2: number of links to file */
195 	o_uid_t	ic_suid;	/*  4: owner's user id */
196 	o_gid_t	ic_sgid;	/*  6: owner's group id */
197 	u_offset_t ic_lsize;	/*  8: number of bytes in file */
198 #ifdef _KERNEL
199 	struct timeval32 ic_atime;	/* 16: time last accessed */
200 	struct timeval32 ic_mtime;	/* 24: time last modified */
201 	struct timeval32 ic_ctime;	/* 32: last time inode changed */
202 #else
203 	time32_t ic_atime;	/* 16: time last accessed */
204 	int32_t	ic_atspare;
205 	time32_t ic_mtime;	/* 24: time last modified */
206 	int32_t	ic_mtspare;
207 	time32_t ic_ctime;	/* 32: last time inode changed */
208 	int32_t	ic_ctspare;
209 #endif
210 	daddr32_t	ic_db[NDADDR];	/* 40: disk block addresses */
211 	daddr32_t	ic_ib[NIADDR];	/* 88: indirect blocks */
212 	int32_t	ic_flags;	/* 100: cflags */
213 	int32_t	ic_blocks;	/* 104: 512 byte blocks actually held */
214 	int32_t	ic_gen;		/* 108: generation number */
215 	int32_t	ic_shadow;	/* 112: shadow inode */
216 	uid_t	ic_uid;		/* 116: long EFT version of uid */
217 	gid_t	ic_gid;		/* 120: long EFT version of gid */
218 	uint32_t ic_oeftflag;	/* 124: extended attr directory ino, 0 = none */
219 };
220 
221 /*
222  * Large Files: Note we use the inline functions load_double, store_double
223  * to load and store the long long values of i_size. Therefore the
224  * address of i_size must be eight byte aligned. Kmem_alloc of incore
225  * inode structure makes sure that the structure is 8-byte aligned.
226  * XX64 - reorder this structure?
227  */
228 typedef struct inode {
229 	struct	inode *i_chain[2];	/* must be first */
230 	struct inode *i_freef;	/* free list forward - must be before i_ic */
231 	struct inode *i_freeb;	/* free list back - must be before i_ic */
232 	struct 	icommon	i_ic;	/* Must be here */
233 	struct	vnode *i_vnode;	/* vnode associated with this inode */
234 	struct	vnode *i_devvp;	/* vnode for block I/O */
235 	dev_t	i_dev;		/* device where inode resides */
236 	ino_t	i_number;	/* i number, 1-to-1 with device address */
237 	off_t	i_diroff;	/* offset in dir, where we found last entry */
238 				/* just a hint - no locking needed */
239 	struct ufsvfs *i_ufsvfs; /* incore fs associated with inode */
240 	struct	dquot *i_dquot;	/* quota structure controlling this file */
241 	krwlock_t i_rwlock;	/* serializes write/setattr requests */
242 	krwlock_t i_contents;	/* protects (most of) inode contents */
243 	kmutex_t i_tlock;	/* protects time fields, i_flag */
244 	offset_t i_nextr;	/*					*/
245 				/* next byte read offset (read-ahead)	*/
246 				/*   No lock required			*/
247 				/*					*/
248 	uint_t	i_flag;		/* inode flags */
249 	uint_t	i_seq;		/* modification sequence number */
250 	boolean_t i_cachedir;	/* Cache this directory on next lookup */
251 				/* - no locking needed  */
252 	long	i_mapcnt;	/* mappings to file pages */
253 	int	*i_map;		/* block list for the corresponding file */
254 	dev_t	i_rdev;		/* INCORE rdev from i_oldrdev by ufs_iget */
255 	size_t	i_delaylen;	/* delayed writes, units=bytes */
256 	offset_t i_delayoff;	/* where we started delaying */
257 	offset_t i_nextrio;	/* where to start the next clust */
258 	long	i_writes;	/* number of outstanding bytes in write q */
259 	kcondvar_t i_wrcv;	/* sleep/wakeup for write throttle */
260 	offset_t i_doff;	/* dinode byte offset in file system */
261 	si_t *i_ufs_acl;	/* pointer to acl entry */
262 	dcanchor_t i_danchor;	/* directory cache anchor */
263 	kthread_t *i_writer;	/* thread which is in window in wrip() */
264 } inode_t;
265 
266 struct dinode {
267 	union {
268 		struct	icommon di_icom;
269 		char	di_size[128];
270 	} di_un;
271 };
272 
273 #define	i_mode		i_ic.ic_smode
274 #define	i_nlink		i_ic.ic_nlink
275 #define	i_uid		i_ic.ic_uid
276 #define	i_gid		i_ic.ic_gid
277 #define	i_smode		i_ic.ic_smode
278 #define	i_suid		i_ic.ic_suid
279 #define	i_sgid		i_ic.ic_sgid
280 
281 #define	i_size		i_ic.ic_lsize
282 #define	i_db		i_ic.ic_db
283 #define	i_ib		i_ic.ic_ib
284 
285 #define	i_atime		i_ic.ic_atime
286 #define	i_mtime		i_ic.ic_mtime
287 #define	i_ctime		i_ic.ic_ctime
288 
289 #define	i_shadow	i_ic.ic_shadow
290 #define	i_oeftflag	i_ic.ic_oeftflag
291 #define	i_blocks	i_ic.ic_blocks
292 #define	i_cflags	i_ic.ic_flags
293 #ifdef _LITTLE_ENDIAN
294 /*
295  * Originally done on x86, but carried on to all other little
296  * architectures, which provides for file system compatibility.
297  */
298 #define	i_ordev		i_ic.ic_db[1]	/* USL SVR4 compatibility */
299 #else
300 #define	i_ordev		i_ic.ic_db[0]	/* was i_oldrdev */
301 #endif
302 #define	i_gen		i_ic.ic_gen
303 #define	i_forw		i_chain[0]
304 #define	i_back		i_chain[1]
305 
306 /* EFT transition aids - obsolete */
307 #define	oEFT_MAGIC	0x90909090
308 #define	di_oeftflag	di_ic.ic_oeftflag
309 
310 #define	di_ic		di_un.di_icom
311 #define	di_mode		di_ic.ic_smode
312 #define	di_nlink	di_ic.ic_nlink
313 #define	di_uid		di_ic.ic_uid
314 #define	di_gid		di_ic.ic_gid
315 #define	di_smode	di_ic.ic_smode
316 #define	di_suid		di_ic.ic_suid
317 #define	di_sgid		di_ic.ic_sgid
318 
319 #define	di_size		di_ic.ic_lsize
320 #define	di_db		di_ic.ic_db
321 #define	di_ib		di_ic.ic_ib
322 
323 #define	di_atime	di_ic.ic_atime
324 #define	di_mtime	di_ic.ic_mtime
325 #define	di_ctime	di_ic.ic_ctime
326 #define	di_cflags	di_ic.ic_flags
327 
328 #ifdef _LITTLE_ENDIAN
329 #define	di_ordev	di_ic.ic_db[1]
330 #else
331 #define	di_ordev	di_ic.ic_db[0]
332 #endif
333 #define	di_shadow	di_ic.ic_shadow
334 #define	di_blocks	di_ic.ic_blocks
335 #define	di_gen		di_ic.ic_gen
336 
337 /* flags */
338 #define	IUPD		0x0001		/* file has been modified */
339 #define	IACC		0x0002		/* inode access time to be updated */
340 #define	IMOD		0x0004		/* inode has been modified */
341 #define	ICHG		0x0008		/* inode has been changed */
342 #define	INOACC		0x0010		/* no access time update in getpage */
343 #define	IMODTIME	0x0020		/* mod time already set */
344 #define	IREF		0x0040		/* inode is being referenced */
345 #define	ISYNC		0x0080		/* do all allocation synchronously */
346 #define	IFASTSYMLNK	0x0100		/* fast symbolic link */
347 #define	IMODACC		0x0200		/* only access time changed; */
348 					/*   filesystem won't become active */
349 #define	IATTCHG		0x0400		/* only size/blocks have changed */
350 #define	IBDWRITE	0x0800		/* the inode has been scheduled for */
351 					/* write operation asynchronously */
352 #define	ISTALE		0x1000		/* inode couldn't be read from disk */
353 #define	IDEL		0x2000		/* inode is being deleted */
354 #define	IDIRECTIO	0x4000		/* attempt directio */
355 #define	ISEQ		0x8000		/* deferred i_seq increase */
356 #define	IJUNKIQ		0x10000		/* on junk idle queue */
357 #define	IQUIET		0x20000		/* No file system full messages */
358 
359 /* cflags */
360 #define	IXATTR		0x0001		/* Extended attribute */
361 
362 /* modes */
363 #define	IFMT		0170000		/* type of file */
364 #define	IFIFO		0010000		/* named pipe (fifo) */
365 #define	IFCHR		0020000		/* character special */
366 #define	IFDIR		0040000		/* directory */
367 #define	IFBLK		0060000		/* block special */
368 #define	IFREG		0100000		/* regular */
369 #define	IFLNK		0120000		/* symbolic link */
370 #define	IFSHAD		0130000		/* shadow indode */
371 #define	IFSOCK		0140000		/* socket */
372 #define	IFATTRDIR	0160000		/* Attribute directory */
373 
374 #define	ISUID		04000		/* set user id on execution */
375 #define	ISGID		02000		/* set group id on execution */
376 #define	ISVTX		01000		/* save swapped text even after use */
377 #define	IREAD		0400		/* read, write, execute permissions */
378 #define	IWRITE		0200
379 #define	IEXEC		0100
380 
381 /* specify how the inode info is written in ufs_syncip() */
382 #define	I_SYNC		1		/* wait for the inode written to disk */
383 #define	I_DSYNC		2		/* wait for the inode written to disk */
384 					/* only if IATTCHG is set */
385 #define	I_ASYNC		0		/* don't wait for the inode written */
386 
387 /* flags passed to ufs_itrunc(), indirtrunc(), and free() */
388 #define	I_FREE	0x00000001		/* inode is being freed */
389 #define	I_DIR	0x00000002		/* inode is a directory */
390 #define	I_IBLK	0x00000004		/* indirect block */
391 #define	I_CHEAP	0x00000008		/* cheap free */
392 #define	I_SHAD	0x00000010		/* inode is a shadow inode */
393 #define	I_QUOTA	0x00000020		/* quota file */
394 #define	I_NOCANCEL	0x40		/* Don't cancel these fragments */
395 #define	I_ACCT	0x00000080		/* Update ufsvfs' unreclaimed_blocks */
396 /*
397  * Statistics on inodes
398  * Not protected by locks
399  */
400 struct instats {
401 	kstat_named_t in_size;		/* current cache size */
402 	kstat_named_t in_maxsize;	/* maximum cache size */
403 	kstat_named_t in_hits;		/* cache hits */
404 	kstat_named_t in_misses;	/* cache misses */
405 	kstat_named_t in_malloc;	/* kmem_alloce'd */
406 	kstat_named_t in_mfree;		/* kmem_free'd */
407 	kstat_named_t in_maxreached;	/* Largest size reached by cache */
408 	kstat_named_t in_frfront;	/* # put at front of freelist */
409 	kstat_named_t in_frback;	/* # put at back of freelist */
410 	kstat_named_t in_qfree;		/* q's to delete thread */
411 	kstat_named_t in_scan;		/* # inodes scanned */
412 	kstat_named_t in_tidles;	/* # inodes idled by idle thread */
413 	kstat_named_t in_lidles;	/* # inodes idled by ufs_lookup */
414 	kstat_named_t in_vidles;	/* # inodes idled by ufs_vget */
415 	kstat_named_t in_kcalloc;	/* # inodes kmem_cache_alloced */
416 	kstat_named_t in_kcfree;	/* # inodes kmem_cache_freed */
417 	kstat_named_t in_poc;		/* # push-on-close's */
418 };
419 
420 #ifdef _KERNEL
421 
422 /*
423  * Extended attributes
424  */
425 
426 #define	XATTR_DIR_NAME	"/@/"
427 extern int	ufs_ninode;		/* high-water mark for inode cache */
428 
429 extern struct vnodeops *ufs_vnodeops;	/* vnode operations for ufs */
430 extern const struct fs_operation_def ufs_vnodeops_template[];
431 
432 /*
433  * Convert between inode pointers and vnode pointers
434  */
435 #define	VTOI(VP)	((struct inode *)(VP)->v_data)
436 #define	ITOV(IP)	((struct vnode *)(IP)->i_vnode)
437 
438 /*
439  * convert to fs
440  */
441 #define	ITOF(IP)	((struct fs *)(IP)->i_fs)
442 
443 /*
444  * Convert between vnode types and inode formats
445  */
446 extern enum vtype	iftovt_tab[];
447 
448 #ifdef notneeded
449 
450 /* Look at sys/mode.h and os/vnode.c */
451 
452 extern int		vttoif_tab[];
453 
454 #endif
455 
456 /*
457  * Mark an inode with the current (unique) timestamp.
458  * (Note that UFS's concept of time only keeps 32 bits of seconds
459  * in the on-disk format).
460  */
461 struct timeval32 iuniqtime;
462 extern kmutex_t ufs_iuniqtime_lock;
463 
464 #define	ITIMES_NOLOCK(ip) ufs_itimes_nolock(ip)
465 
466 #define	ITIMES(ip) { \
467 	mutex_enter(&(ip)->i_tlock); \
468 	ITIMES_NOLOCK(ip); \
469 	mutex_exit(&(ip)->i_tlock); \
470 }
471 
472 /*
473  * The following interfaces are used to do atomic loads and stores
474  * of an inode's i_size, which is a long long data type.
475  *
476  * For LP64, we just to a load or a store - atomicity and alignment
477  * are 8-byte guaranteed.  For x86 there are no such instructions,
478  * so we grab i_contents as reader to get the size; we already hold
479  * it as writer when we're setting the size.
480  */
481 
482 #ifdef _LP64
483 
484 #define	UFS_GET_ISIZE(resultp, ip)	*(resultp) = (ip)->i_size
485 #define	UFS_SET_ISIZE(value, ip)	(ip)->i_size = (value)
486 
487 #else	/* _LP64 */
488 
489 #define	UFS_GET_ISIZE(resultp, ip)				\
490 	{							\
491 		rw_enter(&(ip)->i_contents, RW_READER);		\
492 		*(resultp) = (ip)->i_size;			\
493 		rw_exit(&(ip)->i_contents);			\
494 	}
495 #define	UFS_SET_ISIZE(value, ip)				\
496 	{							\
497 		ASSERT(RW_WRITE_HELD(&(ip)->i_contents));	\
498 		(ip)->i_size = (value);				\
499 	}
500 
501 #endif	/* _LP64 */
502 
503 /*
504  * Allocate the specified block in the inode
505  * and make sure any in-core pages are initialized.
506  */
507 #define	BMAPALLOC(ip, off, size, cr) \
508 	bmap_write((ip), (u_offset_t)(off), (size), 0, cr)
509 
510 #define	ESAME	(-1)		/* trying to rename linked files (special) */
511 
512 #define	UFS_HOLE	(daddr32_t)-1	/* value used when no block allocated */
513 
514 /*
515  * enums
516  */
517 
518 /* direnter ops */
519 enum de_op { DE_CREATE, DE_MKDIR, DE_LINK, DE_RENAME, DE_SYMLINK, DE_ATTRDIR};
520 
521 /* dirremove ops */
522 enum dr_op { DR_REMOVE, DR_RMDIR, DR_RENAME };
523 
524 /*
525  * This overlays the fid structure (see vfs.h)
526  *
527  * LP64 note: we use int32_t instead of ino_t since UFS does not use
528  * inode numbers larger than 32-bits and ufid's are passed to NFS
529  * which expects them to not grow in size beyond 10 bytes (12 including
530  * the length).
531  */
532 struct ufid {
533 	ushort_t ufid_len;
534 	ushort_t ufid_flags;
535 	int32_t	ufid_ino;
536 	int32_t	ufid_gen;
537 };
538 
539 /*
540  * each ufs thread (see ufs_thread.c) is managed by this struct
541  */
542 struct ufs_q {
543 	union uq_head {
544 		void		*_uq_generic;	/* first entry on q */
545 		struct inode	*_uq_i;
546 		ufs_failure_t	*_uq_uf;
547 	} _uq_head;
548 	int		uq_ne;		/* # of entries/failures found */
549 	int		uq_lowat;	/* thread runs when ne == lowat */
550 	int		uq_hiwat;	/* synchronous idle if ne >= hiwat */
551 	ushort_t	uq_flags;	/* flags (see below) */
552 	kcondvar_t	uq_cv;		/* for sleep/wakeup */
553 	kthread_id_t	uq_threadp;	/* thread managing this q */
554 	kmutex_t	uq_mutex;	/* protects this struct */
555 };
556 
557 #define	uq_head		_uq_head._uq_generic
558 #define	uq_ihead	_uq_head._uq_i
559 #define	uq_ufhead	_uq_head._uq_uf
560 
561 /*
562  * uq_flags
563  */
564 #define	UQ_EXIT		(0x0001)	/* q server exits at its convenience */
565 #define	UQ_WAIT		(0x0002)	/* thread is waiting on q server */
566 #define	UQ_SUSPEND	(0x0004)	/* request for suspension */
567 #define	UQ_SUSPENDED	(0x0008)	/* thread has suspended itself */
568 #define	UQ_FASTCLIENTS	(0x0010)	/* fast clients in ufs_delq_info */
569 
570 /*
571  * When logging is enabled, statvfs must account for blocks and files that
572  * may be on the delete queue.  Protected by ufsvfsp->vfs_delete.uq_mutex
573  */
574 struct ufs_delq_info {
575 	kcondvar_t	delq_fast_cv;	/* for fast-operating clients */
576 	u_offset_t	delq_unreclaimed_blocks;
577 	ulong_t		delq_unreclaimed_files;
578 };
579 
580 
581 /*
582  * global idle queues
583  * The queues are sized dynamically in proportion to ufs_ninode
584  * which, unless overridden, scales with the amount of memory.
585  * The idle queue is halved whenever it hits the low water mark
586  * (1/4 of ufs_ninode), but can burst to sizes much larger. The number
587  * of hash queues is currently maintained to give on average IQHASHQLEN
588  * entries when the idle queue is at the low water mark.
589  * Note, we do not need to search along the hash queues, but use them
590  * in order to batch together geographically local inodes to allow
591  * their updates (via the log or buffer cache) to require less disk seeks.
592  * This gives an incredible performance boost for logging and a boost for
593  * non logging file systems.
594  */
595 typedef struct {
596 	inode_t *i_chain[2];	/* must match inode_t, but unused */
597 	inode_t *i_freef;	/* must match inode_t, idle list forward */
598 	inode_t *i_freeb;	/* must match inode_t, idle list back  */
599 } iqhead_t;
600 
601 extern struct ufs_q ufs_idle_q;		/* used by global ufs idle thread */
602 extern iqhead_t *ufs_junk_iq;		/* junk idle queues */
603 extern iqhead_t *ufs_useful_iq;		/* useful idle queues */
604 extern int ufs_njunk_iq;		/* number of entries in junk iq */
605 extern int ufs_nuseful_iq;		/* number of entries in useful iq */
606 extern int ufs_niqhash;			/* number of iq hash qs - power of 2 */
607 extern int ufs_iqhashmask;		/* iq hash mask = ufs_niqhash - 1 */
608 
609 #define	IQHASHQLEN 32			/* see comments above */
610 #define	INOCGSHIFT 7			/* 128 inodes per cylinder group */
611 #define	IQHASH(ip) (((ip)->i_number >> INOCGSHIFT) & ufs_iqhashmask)
612 #define	IQNEXT(i) ((i) + 1) & ufs_iqhashmask /* next idle queue */
613 
614 extern struct ufs_q	ufs_hlock;	/* used by global ufs hlock thread */
615 
616 /*
617  * vfs_lfflags flags
618  */
619 #define	UFS_LARGEFILES	((ushort_t)0x1)	/* set if mount allows largefiles */
620 
621 /*
622  * vfs_dfritime flags
623  */
624 #define	UFS_DFRATIME	0x1		/* deferred access time */
625 
626 /*
627  * UFS VFS private data.
628  *
629  * UFS file system instances may be linked on several lists.
630  *
631  * -	The vfs_next field chains together every extant ufs instance; this
632  *	list is rooted at ufs_instances and should be used in preference to
633  *	the overall vfs list (which is properly the province of the generic
634  *	file system code, not of file system implementations).  This same list
635  *	link is used during forcible unmounts to chain together instances that
636  *	can't yet be completely dismantled,
637  *
638  * -	The vfs_wnext field is used within ufs_update to form a work list of
639  *	UFS instances to be synced out.
640  */
641 typedef struct ufsvfs {
642 	struct vfs	*vfs_vfs;	/* back link			*/
643 	struct ufsvfs	*vfs_next;	/* instance list link		*/
644 	struct ufsvfs	*vfs_wnext;	/* work list link		*/
645 	struct vnode	*vfs_root;	/* root vnode			*/
646 	struct buf	*vfs_bufp;	/* buffer containing superblock */
647 	struct vnode	*vfs_devvp;	/* block device vnode		*/
648 	ushort_t	vfs_lfflags;	/* Large files (set by mount)   */
649 	ushort_t	vfs_qflags;	/* QUOTA: filesystem flags	*/
650 	struct inode	*vfs_qinod;	/* QUOTA: pointer to quota file */
651 	uint_t		vfs_btimelimit;	/* QUOTA: block time limit	*/
652 	uint_t		vfs_ftimelimit;	/* QUOTA: file time limit	*/
653 	krwlock_t	vfs_dqrwlock;	/* QUOTA: protects quota fields */
654 	/*
655 	 * some fs local threads
656 	 */
657 	struct ufs_q	vfs_delete;	/* delayed inode delete */
658 	struct ufs_q	vfs_reclaim;	/* reclaim open, deleted files */
659 
660 	/*
661 	 * This is copied from the super block at mount time.
662 	 */
663 	int		vfs_nrpos;	/* # rotational positions */
664 	/*
665 	 * This lock protects cg's and super block pointed at by
666 	 * vfs_bufp->b_fs.  Locks contents of fs and cg's and contents
667 	 * of vfs_dio.
668 	 */
669 	kmutex_t	vfs_lock;
670 	struct ulockfs	vfs_ulockfs;	/* ufs lockfs support */
671 	uint_t		vfs_dio;	/* delayed io (_FIODIO) */
672 	uint_t		vfs_nointr;	/* disallow lockfs interrupts */
673 	uint_t		vfs_nosetsec;	/* disallow ufs_setsecattr */
674 	uint_t		vfs_syncdir;	/* synchronous local directory ops */
675 	uint_t		vfs_dontblock;	/* don't block on forced umount */
676 
677 	/*
678 	 * trans (logging ufs) stuff
679 	 */
680 	uint_t		vfs_domatamap;	/* set if matamap enabled */
681 	ulong_t		vfs_maxacl;	/* transaction stuff - max acl size */
682 	ulong_t		vfs_dirsize;	/* logspace for directory creation */
683 	ulong_t		vfs_avgbfree;	/* average free blks in cg (blkpref) */
684 	/*
685 	 * Some useful constants
686 	 */
687 	int	vfs_nindirshift;	/* calc. from fs_nindir */
688 	int	vfs_nindiroffset;	/* calc. from fs_ninidr */
689 	int	vfs_ioclustsz;		/* bytes in read/write cluster */
690 	int	vfs_iotransz;		/* max device i/o transfer size  */
691 
692 	vfs_ufsfx_t	vfs_fsfx;	/* lock/fix-on-panic support */
693 	/*
694 	 * More useful constants
695 	 */
696 	int	vfs_minfrags;		/* calc. from fs_minfree */
697 	/*
698 	 * Force DirectIO on all files
699 	 */
700 	uint_t	vfs_forcedirectio;
701 	/*
702 	 * Deferred inode time related fields
703 	 */
704 	clock_t		vfs_iotstamp;	/* last I/O timestamp */
705 	uint_t		vfs_dfritime;	/* deferred inode time flags */
706 	/*
707 	 * Some more useful info
708 	 */
709 	dev_t		vfs_dev;	/* device mounted from */
710 	struct ml_unit	*vfs_log;	/* pointer to embedded log struct */
711 	uint_t		vfs_noatime;    /* disable inode atime updates */
712 	/*
713 	 * snapshot stuff
714 	 */
715 	void		*vfs_snapshot;	/* snapshot handle */
716 	/*
717 	 *  Controls logging "file system full" messages to messages file
718 	 */
719 	clock_t		vfs_lastwhinetime;
720 
721 	int 		vfs_nolog_si;	/* not logging summary info */
722 	int		vfs_validfs;	/* indicates mounted fs */
723 
724 	/*
725 	 * Additional information about vfs_delete above
726 	 */
727 	struct ufs_delq_info vfs_delete_info; /* what's on the delete queue */
728 } ufsvfs_t;
729 
730 #define	vfs_fs	vfs_bufp->b_un.b_fs
731 
732 /*
733  * values for vfs_validfs
734  */
735 #define	UT_UNMOUNTED	0
736 #define	UT_MOUNTED	1
737 #define	UT_HLOCKING	2
738 
739 /* inohsz is guaranteed to be a power of 2 */
740 #define	INOHASH(ino)	(((int)ino) & (inohsz - 1))
741 
742 union ihead {
743 	union	ihead	*ih_head[2];
744 	struct	inode	*ih_chain[2];
745 };
746 
747 extern	union	ihead	*ihead;
748 extern  kmutex_t	*ih_lock;
749 extern  int	*ih_ne;
750 extern	int	inohsz;
751 
752 extern	clock_t	ufs_iowait;
753 
754 #endif /* _KERNEL */
755 
756 /*
757  * ufs function prototypes
758  */
759 #if defined(_KERNEL) && !defined(_BOOT)
760 
761 extern	void	ufs_iinit(void);
762 extern	int	ufs_iget(struct vfs *, ino_t, struct inode **, cred_t *);
763 extern	int	ufs_iget_alloced(struct vfs *, ino_t, struct inode **,
764     cred_t *);
765 extern	void	ufs_reset_vnode(vnode_t *);
766 extern	void	ufs_iinactive(struct inode *);
767 extern	void	ufs_iupdat(struct inode *, int);
768 extern	int	ufs_rmidle(struct inode *);
769 extern	int	ufs_itrunc(struct inode *, u_offset_t, int, cred_t *);
770 extern	int	ufs_iaccess(void *, int, cred_t *);
771 extern  int	rdip(struct inode *, struct uio *, int, struct cred *);
772 extern  int	wrip(struct inode *, struct uio *, int, struct cred *);
773 
774 extern void	ufs_imark(struct inode *);
775 extern void	ufs_itimes_nolock(struct inode *);
776 
777 extern	int	ufs_dirlook(struct inode *, char *, struct inode **,
778     cred_t *, int);
779 extern	int	ufs_direnter_cm(struct inode *, char *, enum de_op,
780     struct vattr *, struct inode **, cred_t *, int);
781 extern	int	ufs_direnter_lr(struct inode *, char *, enum de_op,
782     struct inode *, struct inode *, cred_t *, vnode_t **);
783 extern	int	ufs_dircheckpath(ino_t, struct inode *, struct inode *,
784     struct cred *);
785 extern	int	ufs_dirmakeinode(struct inode *, struct inode **,
786     struct vattr *, enum de_op, cred_t *);
787 extern	int	ufs_dirremove(struct inode *, char *, struct inode *,
788     vnode_t *, enum dr_op, cred_t *, vnode_t **);
789 extern	int	ufs_xattrdirempty(struct inode *, ino_t, cred_t *);
790 extern	int	blkatoff(struct inode *, off_t, char **, struct fbuf **);
791 
792 extern	void	sbupdate(struct vfs *);
793 
794 extern	int	ufs_ialloc(struct inode *, ino_t, mode_t, struct inode **,
795     cred_t *);
796 extern	void	ufs_ifree(struct inode *, ino_t, mode_t);
797 extern	void	free(struct inode *, daddr_t, off_t, int);
798 extern	int	alloc(struct inode *, daddr_t, int, daddr_t *, cred_t *);
799 extern	int	realloccg(struct inode *, daddr_t, daddr_t, int, int,
800     daddr_t *, cred_t *);
801 extern	int	ufs_freesp(struct vnode *, struct flock64 *, int, cred_t *);
802 extern	ino_t	dirpref(inode_t *);
803 extern	daddr_t	blkpref(struct inode *, daddr_t, int, daddr32_t *);
804 extern	daddr_t	contigpref(ufsvfs_t *, size_t);
805 
806 extern	int	ufs_rdwri(enum uio_rw, int, struct inode *, caddr_t, ssize_t,
807 	offset_t, enum uio_seg, int *, cred_t *);
808 
809 extern	int	bmap_read(struct inode *, u_offset_t, daddr_t *, int *);
810 extern	int	bmap_write(struct inode *, u_offset_t, int, int, struct cred *);
811 extern	int	bmap_has_holes(struct inode *);
812 extern	int	bmap_find(struct inode *, boolean_t, u_offset_t *);
813 
814 extern	void	ufs_vfs_add(struct ufsvfs *);
815 extern	void	ufs_vfs_remove(struct ufsvfs *);
816 
817 extern	void	ufs_sbwrite(struct ufsvfs *);
818 extern	void	ufs_update(int);
819 extern	int	ufs_getsummaryinfo(dev_t, struct ufsvfs *, struct fs *);
820 extern	int	ufs_putsummaryinfo(dev_t, struct ufsvfs *, struct fs *);
821 extern	int	ufs_syncip(struct inode *, int, int, top_t);
822 extern	int	ufs_sync_indir(struct inode *);
823 extern	int	ufs_indirblk_sync(struct inode *, offset_t);
824 extern	int	ufs_badblock(struct inode *, daddr_t);
825 extern	int	ufs_indir_badblock(struct inode *, daddr32_t *);
826 extern	void	ufs_notclean(struct ufsvfs *);
827 extern	void	ufs_checkclean(struct vfs *);
828 extern	int	isblock(struct fs *, uchar_t *, daddr_t);
829 extern	void	setblock(struct fs *, uchar_t *, daddr_t);
830 extern	void	clrblock(struct fs *, uchar_t *, daddr_t);
831 extern	int	isclrblock(struct fs *, uchar_t *, daddr_t);
832 extern	void	fragacct(struct fs *, int, int32_t *, int);
833 extern	int	skpc(char, uint_t, char *);
834 extern	int	ufs_fbwrite(struct fbuf *, struct inode *);
835 extern	int	ufs_fbiwrite(struct fbuf *, struct inode *, daddr_t, long);
836 extern	int	ufs_putapage(struct vnode *, struct page *, u_offset_t *,
837 				size_t *, int, struct cred *);
838 extern inode_t	*ufs_alloc_inode(ufsvfs_t *, ino_t);
839 extern void	ufs_free_inode(inode_t *);
840 
841 /*
842  * special stuff
843  */
844 extern	void	ufs_setreclaim(struct inode *);
845 extern	int	ufs_scan_inodes(int, int (*)(struct inode *, void *), void *,
846 				struct ufsvfs *);
847 extern	int	ufs_sync_inode(struct inode *, void *);
848 extern	int	ufs_sticky_remove_access(struct inode *, struct inode *,
849     struct cred *);
850 /*
851  * quota
852  */
853 extern	int	chkiq(struct ufsvfs *, int, struct inode *, uid_t, int,
854 			struct cred *, char **errp, size_t *lenp);
855 
856 /*
857  * ufs thread stuff
858  */
859 extern	void	ufs_thread_delete(struct vfs *);
860 extern	void	ufs_delete_drain(struct vfs *, int, int);
861 extern	void	ufs_delete(struct ufsvfs *, struct inode *, int);
862 extern	void	ufs_inode_cache_reclaim(void *);
863 extern	void	ufs_idle_drain(struct vfs *);
864 extern	void	ufs_idle_some(int);
865 extern	void	ufs_thread_idle(void);
866 extern	void	ufs_thread_reclaim(struct vfs *);
867 extern	void	ufs_thread_init(struct ufs_q *, int);
868 extern	void	ufs_thread_start(struct ufs_q *, void (*)(), struct vfs *);
869 extern	void	ufs_thread_exit(struct ufs_q *);
870 extern	void	ufs_thread_suspend(struct ufs_q *);
871 extern	void	ufs_thread_continue(struct ufs_q *);
872 extern	void	ufs_thread_hlock(void *);
873 extern	void	ufs_delete_init(struct ufsvfs *, int);
874 extern	void	ufs_delete_adjust_stats(struct ufsvfs *, struct statvfs64 *);
875 extern	void	ufs_delete_drain_wait(struct ufsvfs *, int);
876 
877 /*
878  * ufs lockfs stuff
879  */
880 struct seg;
881 extern int ufs_reconcile_fs(struct vfs *, struct ufsvfs *, int);
882 extern int ufs_quiesce(struct ulockfs *);
883 extern int ufs_flush(struct vfs *);
884 extern int ufs_fiolfs(struct vnode *, struct lockfs *, int);
885 extern int ufs__fiolfs(struct vnode *, struct lockfs *, int, int);
886 extern int ufs_fiolfss(struct vnode *, struct lockfs *);
887 extern int ufs_fioffs(struct vnode *, char *, struct cred *);
888 extern int ufs_check_lockfs(struct ufsvfs *, struct ulockfs *, ulong_t);
889 extern int ufs_lockfs_begin(struct ufsvfs *, struct ulockfs **, ulong_t);
890 extern int ufs_lockfs_begin_getpage(struct ufsvfs *, struct ulockfs **,
891 		struct seg *, int, uint_t *);
892 extern void ufs_lockfs_end(struct ulockfs *);
893 /*
894  * ufs acl stuff
895  */
896 extern int ufs_si_inherit(struct inode *, struct inode *, o_mode_t, cred_t *);
897 extern void si_cache_init(void);
898 extern int ufs_si_load(struct inode *, cred_t *);
899 extern void ufs_si_del(struct inode *);
900 extern int ufs_acl_access(struct inode *, int, cred_t *);
901 extern void ufs_si_cache_flush(dev_t);
902 extern int ufs_si_free(si_t *, struct vfs *, cred_t *);
903 extern int ufs_acl_setattr(struct inode *, struct vattr *, cred_t *);
904 extern int ufs_acl_get(struct inode *, vsecattr_t *, int, cred_t *);
905 extern int ufs_acl_set(struct inode *, vsecattr_t *, int, cred_t *);
906 /*
907  * ufs directio stuff
908  */
909 extern void ufs_directio_init();
910 extern int ufs_directio_write(struct inode *, uio_t *, int, int, cred_t *,
911     int *);
912 extern int ufs_directio_read(struct inode *, uio_t *, cred_t *, int *);
913 #define	DIRECTIO_FAILURE	(0)
914 #define	DIRECTIO_SUCCESS	(1)
915 
916 /*
917  * ufs extensions for PXFS
918  */
919 
920 int ufs_rdwr_data(vnode_t *vp, u_offset_t offset, size_t len, fdbuffer_t *fdb,
921     int flags, cred_t *cr);
922 int ufs_alloc_data(vnode_t *vp, u_offset_t offset, size_t *len, fdbuffer_t *fdb,
923     int flags, cred_t *cr);
924 
925 /*
926  * prototypes to support the forced unmount
927  */
928 
929 void ufs_freeze(struct ulockfs *, struct lockfs *);
930 int ufs_thaw(struct vfs *, struct ufsvfs *, struct ulockfs *);
931 
932 /*
933  * extended attributes
934  */
935 
936 int ufs_xattrmkdir(inode_t *, inode_t **, int, struct cred *);
937 int ufs_xattr_getattrdir(vnode_t *, inode_t **, int, struct cred *);
938 void ufs_unhook_shadow(inode_t *, inode_t *);
939 
940 #endif	/* defined(_KERNEL) && !defined(_BOOT) */
941 
942 #ifdef	__cplusplus
943 }
944 #endif
945 
946 #endif	/* _SYS_FS_UFS_INODE_H */
947