xref: /illumos-gate/usr/src/uts/common/sys/fs/ufs_fs.h (revision 18d738ddd2d0f4a4b4d5b1939e627aacd420b59d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved	*/
28 
29 /*
30  * University Copyright- Copyright (c) 1982, 1986, 1988
31  * The Regents of the University of California
32  * All Rights Reserved
33  *
34  * University Acknowledgment- Portions of this document are derived from
35  * software developed by the University of California, Berkeley, and its
36  * contributors.
37  */
38 
39 #ifndef	_SYS_FS_UFS_FS_H
40 #define	_SYS_FS_UFS_FS_H
41 
42 #include <sys/isa_defs.h>
43 #include <sys/types32.h>
44 #include <sys/t_lock.h>		/* for kmutex_t */
45 
46 #ifdef	__cplusplus
47 extern "C" {
48 #endif
49 
50 /*
51  * The following values are minor release values for UFS.
52  * The fs_version field in the superblock will equal one of them.
53  */
54 
55 #define		MTB_UFS_VERSION_MIN	1
56 #define		MTB_UFS_VERSION_1	1
57 #define		UFS_VERSION_MIN	0
58 #define		UFS_EFISTYLE4NONEFI_VERSION_2	2
59 
60 /*
61  * Each disk drive contains some number of file systems.
62  * A file system consists of a number of cylinder groups.
63  * Each cylinder group has inodes and data.
64  *
65  * A file system is described by its super-block, which in turn
66  * describes the cylinder groups.  The super-block is critical
67  * data and is replicated in the first 10 cylinder groups and the
68  * the last 10 cylinder groups to protect against
69  * catastrophic loss.  This is done at mkfs time and the critical
70  * super-block data does not change, so the copies need not be
71  * referenced further unless disaster strikes.
72  *
73  * For file system fs, the offsets of the various blocks of interest
74  * are given in the super block as:
75  *	[fs->fs_sblkno]		Super-block
76  *	[fs->fs_cblkno]		Cylinder group block
77  *	[fs->fs_iblkno]		Inode blocks
78  *	[fs->fs_dblkno]		Data blocks
79  * The beginning of cylinder group cg in fs, is given by
80  * the ``cgbase(fs, cg)'' macro.
81  *
82  * The first boot and super blocks are given in absolute disk addresses.
83  * The byte-offset forms are preferred, as they don't imply a sector size.
84  */
85 #define	BBSIZE		8192
86 #define	SBSIZE		8192
87 #define	BBOFF		((off_t)(0))
88 #define	SBOFF		((off_t)(BBOFF + BBSIZE))
89 #define	BBLOCK		((daddr32_t)(0))
90 #define	SBLOCK		((daddr32_t)(BBLOCK + BBSIZE / DEV_BSIZE))
91 
92 /*
93  * Addresses stored in inodes are capable of addressing fragments
94  * of `blocks'. File system blocks of at most size MAXBSIZE can
95  * be optionally broken into 2, 4, or 8 pieces, each of which is
96  * addressible; these pieces may be DEV_BSIZE, or some multiple of
97  * a DEV_BSIZE unit.
98  *
99  * Large files consist of exclusively large data blocks.  To avoid
100  * undue wasted disk space, the last data block of a small file may be
101  * allocated as only as many fragments of a large block as are
102  * necessary.  The file system format retains only a single pointer
103  * to such a fragment, which is a piece of a single large block that
104  * has been divided.  The size of such a fragment is determinable from
105  * information in the inode, using the ``blksize(fs, ip, lbn)'' macro.
106  *
107  * The file system records space availability at the fragment level;
108  * to determine block availability, aligned fragments are examined.
109  *
110  * The root inode is the root of the file system.
111  * Inode 0 can't be used for normal purposes and
112  * historically bad blocks were linked to inode 1,
113  * thus the root inode is 2. (inode 1 is no longer used for
114  * this purpose, however numerous dump tapes make this
115  * assumption, so we are stuck with it)
116  * The lost+found directory is given the next available
117  * inode when it is created by ``mkfs''.
118  */
119 #define	UFSROOTINO	((ino_t)2)	/* i number of all roots */
120 #define	LOSTFOUNDINO    (UFSROOTINO + 1)
121 #ifndef _LONGLONG_TYPE
122 #define	UFS_MAXOFFSET_T	MAXOFF_T
123 #define	UFS_FILESIZE_BITS	32
124 #else
125 #define	UFS_MAXOFFSET_T	((1LL << NBBY * sizeof (daddr32_t) + DEV_BSHIFT - 1) \
126 							- 1)
127 #define	UFS_FILESIZE_BITS	41
128 #endif /* _LONGLONG_TYPE */
129 
130 /*
131  * MINBSIZE is the smallest allowable block size.
132  * In order to insure that it is possible to create files of size
133  * 2^32 with only two levels of indirection, MINBSIZE is set to 4096.
134  * MINBSIZE must be big enough to hold a cylinder group block,
135  * thus changes to (struct cg) must keep its size within MINBSIZE.
136  * Note that super blocks are always of size SBSIZE,
137  * and that both SBSIZE and MAXBSIZE must be >= MINBSIZE.
138  */
139 #define	MINBSIZE	4096
140 
141 /*
142  * The path name on which the file system is mounted is maintained
143  * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in
144  * the super block for this name.
145  * The limit on the amount of summary information per file system
146  * is defined by MAXCSBUFS. It is currently parameterized for a
147  * maximum of two million cylinders.
148  */
149 #define	MAXMNTLEN 512
150 #define	MAXCSBUFS 32
151 
152 #define	LABEL_TYPE_VTOC		1
153 #define	LABEL_TYPE_EFI		2
154 #define	LABEL_TYPE_OTHER	3
155 
156 /*
157  * The following constant is taken from the ANSI T13 ATA Specification
158  * and defines the maximum size (in sectors) that an ATA disk can be
159  * and still has to provide CHS translation. For a disk above this
160  * size all sectors are to be accessed via their LBA address. This
161  * makes a good cut off value to move from disk provided geometry
162  * to the predefined defaults used in efi label disks.
163  */
164 #define	CHSLIMIT	(63 * 256 * 1024)
165 
166 /*
167  * Per cylinder group information; summarized in blocks allocated
168  * from first cylinder group data blocks.  These blocks have to be
169  * read in from fs_csaddr (size fs_cssize) in addition to the
170  * super block.
171  *
172  * N.B. sizeof (struct csum) must be a power of two in order for
173  * the ``fs_cs'' macro to work (see below).
174  */
175 struct csum {
176 	int32_t	cs_ndir;	/* number of directories */
177 	int32_t	cs_nbfree;	/* number of free blocks */
178 	int32_t	cs_nifree;	/* number of free inodes */
179 	int32_t	cs_nffree;	/* number of free frags */
180 };
181 
182 /*
183  * In the 5.0 release, the file system state flag in the superblock (fs_clean)
184  * is now used. The value of fs_clean can be:
185  *	FSACTIVE	file system may have fsck inconsistencies
186  *	FSCLEAN		file system has successfully unmounted (implies
187  *			everything is ok)
188  *	FSSTABLE	No fsck inconsistencies, no guarantee on user data
189  *	FSBAD		file system is mounted from a partition that is
190  *			neither FSCLEAN or FSSTABLE
191  *	FSSUSPEND	Clean flag processing is temporarily disabled
192  *	FSLOG		Logging file system
193  * Under this scheme, fsck can safely skip file systems that
194  * are FSCLEAN or FSSTABLE.  To provide additional safeguard,
195  * fs_clean information could be trusted only if
196  * fs_state == FSOKAY - fs_time, where FSOKAY is a constant
197  *
198  * Note: mount(2) will now return ENOSPC if fs_clean is neither FSCLEAN nor
199  * FSSTABLE, or fs_state is not valid.  The exceptions are the root or
200  * the read-only partitions
201  */
202 
203 /*
204  * Super block for a file system.
205  *
206  * Most of the data in the super block is read-only data and needs
207  * no explicit locking to protect it. Exceptions are:
208  *	fs_time
209  *	fs_optim
210  *	fs_cstotal
211  *	fs_fmod
212  *	fs_cgrotor
213  *	fs_flags   (largefiles flag - set when a file grows large)
214  * These fields require the use of fs->fs_lock.
215  */
216 #define	FS_MAGIC	0x011954
217 #define	MTB_UFS_MAGIC	0xdecade
218 #define	FSOKAY		(0x7c269d38)
219 /*  #define	FSOKAY		(0x7c269d38 + 3) */
220 /*
221  * fs_clean values
222  */
223 #define	FSACTIVE	((char)0)
224 #define	FSCLEAN		((char)0x1)
225 #define	FSSTABLE	((char)0x2)
226 #define	FSBAD		((char)0xff)	/* mounted !FSCLEAN and !FSSTABLE */
227 #define	FSSUSPEND	((char)0xfe)	/* temporarily suspended */
228 #define	FSLOG		((char)0xfd)	/* logging fs */
229 #define	FSFIX		((char)0xfc)	/* being repaired while mounted */
230 
231 /*
232  * fs_flags values
233  */
234 #define	FSLARGEFILES	((char)0x1)	/* largefiles exist on filesystem */
235 
236 struct  fs {
237 	uint32_t fs_link;		/* linked list of file systems */
238 	uint32_t fs_rolled;		/* logging only: fs fully rolled */
239 	daddr32_t fs_sblkno;		/* addr of super-block in filesys */
240 	daddr32_t fs_cblkno;		/* offset of cyl-block in filesys */
241 	daddr32_t fs_iblkno;		/* offset of inode-blocks in filesys */
242 	daddr32_t fs_dblkno;		/* offset of first data after cg */
243 	int32_t	fs_cgoffset;		/* cylinder group offset in cylinder */
244 	int32_t	fs_cgmask;		/* used to calc mod fs_ntrak */
245 	time32_t fs_time;		/* last time written */
246 	int32_t	fs_size;		/* number of blocks in fs */
247 	int32_t	fs_dsize;		/* number of data blocks in fs */
248 	int32_t	fs_ncg;			/* number of cylinder groups */
249 	int32_t	fs_bsize;		/* size of basic blocks in fs */
250 	int32_t	fs_fsize;		/* size of frag blocks in fs */
251 	int32_t	fs_frag;		/* number of frags in a block in fs */
252 /* these are configuration parameters */
253 	int32_t	fs_minfree;		/* minimum percentage of free blocks */
254 	int32_t	fs_rotdelay;		/* num of ms for optimal next block */
255 	int32_t	fs_rps;			/* disk revolutions per second */
256 /* these fields can be computed from the others */
257 	int32_t	fs_bmask;		/* ``blkoff'' calc of blk offsets */
258 	int32_t	fs_fmask;		/* ``fragoff'' calc of frag offsets */
259 	int32_t	fs_bshift;		/* ``lblkno'' calc of logical blkno */
260 	int32_t	fs_fshift;		/* ``numfrags'' calc number of frags */
261 /* these are configuration parameters */
262 	int32_t	fs_maxcontig;		/* max number of contiguous blks */
263 	int32_t	fs_maxbpg;		/* max number of blks per cyl group */
264 /* these fields can be computed from the others */
265 	int32_t	fs_fragshift;		/* block to frag shift */
266 	int32_t	fs_fsbtodb;		/* fsbtodb and dbtofsb shift constant */
267 	int32_t	fs_sbsize;		/* actual size of super block */
268 	int32_t	fs_csmask;		/* csum block offset */
269 	int32_t	fs_csshift;		/* csum block number */
270 	int32_t	fs_nindir;		/* value of NINDIR */
271 	int32_t	fs_inopb;		/* value of INOPB */
272 	int32_t	fs_nspf;		/* value of NSPF */
273 /* yet another configuration parameter */
274 	int32_t	fs_optim;		/* optimization preference, see below */
275 /* these fields are derived from the hardware */
276 	/* USL SVR4 compatibility */
277 #ifdef _LITTLE_ENDIAN
278 	/*
279 	 * USL SVR4 compatibility
280 	 *
281 	 * There was a significant divergence here between Solaris and
282 	 * SVR4 for x86.  By swapping these two members in the superblock,
283 	 * we get read-only compatibility of SVR4 filesystems.  Otherwise
284 	 * there would be no compatibility.  This change was introduced
285 	 * during bootstrapping of Solaris on x86.  By making this ifdef'ed
286 	 * on byte order, we provide ongoing compatibility across all
287 	 * platforms with the same byte order, the highest compatibility
288 	 * that can be achieved.
289 	 */
290 	int32_t	fs_state;		/* file system state time stamp */
291 #else
292 	int32_t	fs_npsect;		/* # sectors/track including spares */
293 #endif
294 	int32_t fs_si;			/* summary info state - lufs only */
295 	int32_t	fs_trackskew;		/* sector 0 skew, per track */
296 /* a unique id for this filesystem (currently unused and unmaintained) */
297 /* In 4.3 Tahoe this space is used by fs_headswitch and fs_trkseek */
298 /* Neither of those fields is used in the Tahoe code right now but */
299 /* there could be problems if they are.				*/
300 	int32_t	fs_id[2];		/* file system id */
301 /* sizes determined by number of cylinder groups and their sizes */
302 	daddr32_t fs_csaddr;		/* blk addr of cyl grp summary area */
303 	int32_t	fs_cssize;		/* size of cyl grp summary area */
304 	int32_t	fs_cgsize;		/* cylinder group size */
305 /* these fields are derived from the hardware */
306 	int32_t	fs_ntrak;		/* tracks per cylinder */
307 	int32_t	fs_nsect;		/* sectors per track */
308 	int32_t	fs_spc;			/* sectors per cylinder */
309 /* this comes from the disk driver partitioning */
310 	int32_t	fs_ncyl;		/* cylinders in file system */
311 /* these fields can be computed from the others */
312 	int32_t	fs_cpg;			/* cylinders per group */
313 	int32_t	fs_ipg;			/* inodes per group */
314 	int32_t	fs_fpg;			/* blocks per group * fs_frag */
315 /* this data must be re-computed after crashes */
316 	struct	csum fs_cstotal;	/* cylinder summary information */
317 /* these fields are cleared at mount time */
318 	char	fs_fmod;		/* super block modified flag */
319 	char	fs_clean;		/* file system state flag */
320 	char	fs_ronly;		/* mounted read-only flag */
321 	char	fs_flags;		/* largefiles flag, etc. */
322 	char	fs_fsmnt[MAXMNTLEN];	/* name mounted on */
323 /* these fields retain the current block allocation info */
324 	int32_t	fs_cgrotor;		/* last cg searched */
325 	/*
326 	 * The following used to be fs_csp[MAXCSBUFS]. It was not
327 	 * used anywhere except in old utilities.  We removed this
328 	 * in 5.6 and expect fs_u.fs_csp to be used instead.
329 	 * We no longer limit fs_cssize based on MAXCSBUFS.
330 	 */
331 	union { 			/* fs_cs (csum) info */
332 		uint32_t fs_csp_pad[MAXCSBUFS];
333 		struct csum *fs_csp;
334 	} fs_u;
335 	int32_t	fs_cpc;			/* cyl per cycle in postbl */
336 	short	fs_opostbl[16][8];	/* old rotation block list head */
337 	int32_t	fs_sparecon[51];	/* reserved for future constants */
338 	int32_t fs_version;		/* minor version of ufs */
339 	int32_t	fs_logbno;		/* block # of embedded log */
340 	int32_t fs_reclaim;		/* reclaim open, deleted files */
341 	int32_t	fs_sparecon2;		/* reserved for future constant */
342 #ifdef _LITTLE_ENDIAN
343 	/* USL SVR4 compatibility */
344 	int32_t	fs_npsect;		/* # sectors/track including spares */
345 #else
346 	int32_t	fs_state;		/* file system state time stamp */
347 #endif
348 	quad_t	fs_qbmask;		/* ~fs_bmask - for use with quad size */
349 	quad_t	fs_qfmask;		/* ~fs_fmask - for use with quad size */
350 	int32_t	fs_postblformat;	/* format of positional layout tables */
351 	int32_t	fs_nrpos;		/* number of rotaional positions */
352 	int32_t	fs_postbloff;		/* (short) rotation block list head */
353 	int32_t	fs_rotbloff;		/* (uchar_t) blocks for each rotation */
354 	int32_t	fs_magic;		/* magic number */
355 	uchar_t	fs_space[1];		/* list of blocks for each rotation */
356 /* actually longer */
357 };
358 
359 /*
360  * values for fs_reclaim
361  */
362 #define	FS_RECLAIM	(0x00000001)	/* run the reclaim-files thread */
363 #define	FS_RECLAIMING	(0x00000002)	/* running the reclaim-files thread */
364 #define	FS_CHECKCLEAN	(0x00000004)	/* checking for a clean file system */
365 #define	FS_CHECKRECLAIM	(0x00000008)	/* checking for a reclaimable file */
366 
367 /*
368  * values for fs_rolled
369  */
370 #define	FS_PRE_FLAG	0	/* old system, prior to fs_rolled flag */
371 #define	FS_ALL_ROLLED	1
372 #define	FS_NEED_ROLL	2
373 
374 /*
375  * values for fs_si, logging only
376  * si is the summary of the summary - a copy of the cylinder group summary
377  * info held in an array for perf. On a mount if this is out of date
378  * (FS_SI_BAD) it can be re-constructed by re-reading the cgs.
379  */
380 #define	FS_SI_OK	0	/* on-disk summary info ok */
381 #define	FS_SI_BAD	1	/* out of date on-disk si */
382 
383 /*
384  * Preference for optimization.
385  */
386 #define	FS_OPTTIME	0	/* minimize allocation time */
387 #define	FS_OPTSPACE	1	/* minimize disk fragmentation */
388 
389 /*
390  * Rotational layout table format types
391  */
392 #define	FS_42POSTBLFMT		-1	/* 4.2BSD rotational table format */
393 #define	FS_DYNAMICPOSTBLFMT	1	/* dynamic rotational table format */
394 
395 /*
396  * Macros for access to superblock array structures
397  */
398 #ifdef _KERNEL
399 #define	fs_postbl(ufsvfsp, cylno) \
400 	(((ufsvfsp)->vfs_fs->fs_postblformat != FS_DYNAMICPOSTBLFMT) \
401 	? ((ufsvfsp)->vfs_fs->fs_opostbl[cylno]) \
402 	: ((short *)((char *)(ufsvfsp)->vfs_fs + \
403 	(ufsvfsp)->vfs_fs->fs_postbloff) \
404 	+ (cylno) * (ufsvfsp)->vfs_nrpos))
405 #else
406 #define	fs_postbl(fs, cylno) \
407 	(((fs)->fs_postblformat != FS_DYNAMICPOSTBLFMT) \
408 	? ((fs)->fs_opostbl[cylno]) \
409 	: ((short *)((char *)(fs) + \
410 	(fs)->fs_postbloff) \
411 	+ (cylno) * (fs)->fs_nrpos))
412 #endif
413 
414 #define	fs_rotbl(fs) \
415 	(((fs)->fs_postblformat != FS_DYNAMICPOSTBLFMT) \
416 	? ((fs)->fs_space) \
417 	: ((uchar_t *)((char *)(fs) + (fs)->fs_rotbloff)))
418 
419 /*
420  * Convert cylinder group to base address of its global summary info.
421  *
422  * N.B. This macro assumes that sizeof (struct csum) is a power of two.
423  * We just index off the first entry into one big array
424  */
425 
426 #define	fs_cs(fs, indx) fs_u.fs_csp[(indx)]
427 
428 /*
429  * Cylinder group block for a file system.
430  *
431  * Writable fields in the cylinder group are protected by the associated
432  * super block lock fs->fs_lock.
433  */
434 #define	CG_MAGIC	0x090255
435 struct	cg {
436 	uint32_t cg_link;		/* NOT USED linked list of cyl groups */
437 	int32_t	cg_magic;		/* magic number */
438 	time32_t cg_time;		/* time last written */
439 	int32_t	cg_cgx;			/* we are the cgx'th cylinder group */
440 	short	cg_ncyl;		/* number of cyl's this cg */
441 	short	cg_niblk;		/* number of inode blocks this cg */
442 	int32_t	cg_ndblk;		/* number of data blocks this cg */
443 	struct	csum cg_cs;		/* cylinder summary information */
444 	int32_t	cg_rotor;		/* position of last used block */
445 	int32_t	cg_frotor;		/* position of last used frag */
446 	int32_t	cg_irotor;		/* position of last used inode */
447 	int32_t	cg_frsum[MAXFRAG];	/* counts of available frags */
448 	int32_t	cg_btotoff;		/* (int32_t)block totals per cylinder */
449 	int32_t	cg_boff;		/* (short) free block positions */
450 	int32_t	cg_iusedoff;		/* (char) used inode map */
451 	int32_t	cg_freeoff;		/* (uchar_t) free block map */
452 	int32_t	cg_nextfreeoff;		/* (uchar_t) next available space */
453 	int32_t	cg_sparecon[16];	/* reserved for future use */
454 	uchar_t	cg_space[1];		/* space for cylinder group maps */
455 /* actually longer */
456 };
457 
458 /*
459  * Macros for access to cylinder group array structures
460  */
461 
462 #define	cg_blktot(cgp) \
463 	(((cgp)->cg_magic != CG_MAGIC) \
464 	? (((struct ocg *)(cgp))->cg_btot) \
465 	: ((int32_t *)((char *)(cgp) + (cgp)->cg_btotoff)))
466 
467 #ifdef _KERNEL
468 #define	cg_blks(ufsvfsp, cgp, cylno) \
469 	(((cgp)->cg_magic != CG_MAGIC) \
470 	? (((struct ocg *)(cgp))->cg_b[cylno]) \
471 	: ((short *)((char *)(cgp) + (cgp)->cg_boff) + \
472 	(cylno) * (ufsvfsp)->vfs_nrpos))
473 #else
474 #define	cg_blks(fs, cgp, cylno) \
475 	(((cgp)->cg_magic != CG_MAGIC) \
476 	? (((struct ocg *)(cgp))->cg_b[cylno]) \
477 	: ((short *)((char *)(cgp) + (cgp)->cg_boff) + \
478 	(cylno) * (fs)->fs_nrpos))
479 #endif
480 
481 #define	cg_inosused(cgp) \
482 	(((cgp)->cg_magic != CG_MAGIC) \
483 	? (((struct ocg *)(cgp))->cg_iused) \
484 	: ((char *)((char *)(cgp) + (cgp)->cg_iusedoff)))
485 
486 #define	cg_blksfree(cgp) \
487 	(((cgp)->cg_magic != CG_MAGIC) \
488 	? (((struct ocg *)(cgp))->cg_free) \
489 	: ((uchar_t *)((char *)(cgp) + (cgp)->cg_freeoff)))
490 
491 #define	cg_chkmagic(cgp) \
492 	((cgp)->cg_magic == CG_MAGIC || \
493 	((struct ocg *)(cgp))->cg_magic == CG_MAGIC)
494 
495 /*
496  * The following structure is defined
497  * for compatibility with old file systems.
498  */
499 struct	ocg {
500 	uint32_t cg_link;		/* NOT USED linked list of cyl groups */
501 	uint32_t cg_rlink;		/* NOT USED incore cyl groups */
502 	time32_t cg_time;		/* time last written */
503 	int32_t	cg_cgx;			/* we are the cgx'th cylinder group */
504 	short	cg_ncyl;		/* number of cyl's this cg */
505 	short	cg_niblk;		/* number of inode blocks this cg */
506 	int32_t	cg_ndblk;		/* number of data blocks this cg */
507 	struct	csum cg_cs;		/* cylinder summary information */
508 	int32_t	cg_rotor;		/* position of last used block */
509 	int32_t	cg_frotor;		/* position of last used frag */
510 	int32_t	cg_irotor;		/* position of last used inode */
511 	int32_t	cg_frsum[8];		/* counts of available frags */
512 	int32_t	cg_btot[32];		/* block totals per cylinder */
513 	short	cg_b[32][8];		/* positions of free blocks */
514 	char	cg_iused[256];		/* used inode map */
515 	int32_t	cg_magic;		/* magic number */
516 	uchar_t	cg_free[1];		/* free block map */
517 /* actually longer */
518 };
519 
520 /*
521  * Turn frag offsets into disk block addresses.
522  * This maps frags to device size blocks.
523  * (In the names of these macros, "fsb" refers to "frags", not
524  * file system blocks.)
525  */
526 #ifdef KERNEL
527 #define	fsbtodb(fs, b)	(((daddr_t)(b)) << (fs)->fs_fsbtodb)
528 #else /* KERNEL */
529 #define	fsbtodb(fs, b)	(((diskaddr_t)(b)) << (fs)->fs_fsbtodb)
530 #endif /* KERNEL */
531 
532 #define	dbtofsb(fs, b)	((b) >> (fs)->fs_fsbtodb)
533 
534 /*
535  * Get the offset of the log, in either sectors, frags, or file system
536  * blocks.  The interpretation of the fs_logbno field depends on whether
537  * this is UFS or MTB UFS.  (UFS stores the value as sectors.  MTBUFS
538  * stores the value as frags.)
539  */
540 
541 #ifdef KERNEL
542 #define	logbtodb(fs, b)	((fs)->fs_magic == FS_MAGIC ? \
543 		(daddr_t)(b) : ((daddr_t)(b) << (fs)->fs_fsbtodb))
544 #else /* KERNEL */
545 #define	logbtodb(fs, b)	((fs)->fs_magic == FS_MAGIC ? \
546 		(diskaddr_t)(b) : ((diskaddr_t)(b) << (fs)->fs_fsbtodb))
547 #endif /* KERNEL */
548 #define	logbtofrag(fs, b)	((fs)->fs_magic == FS_MAGIC ? \
549 		(b) >> (fs)->fs_fsbtodb : (b))
550 #define	logbtofsblk(fs, b) ((fs)->fs_magic == FS_MAGIC ? \
551 		(b) >> ((fs)->fs_fsbtodb + (fs)->fs_fragshift) : \
552 		(b) >> (fs)->fs_fragshift)
553 
554 /*
555  * Cylinder group macros to locate things in cylinder groups.
556  * They calc file system addresses of cylinder group data structures.
557  */
558 #define	cgbase(fs, c)	((daddr32_t)((fs)->fs_fpg * (c)))
559 
560 #define	cgstart(fs, c) \
561 	(cgbase(fs, c) + (fs)->fs_cgoffset * ((c) & ~((fs)->fs_cgmask)))
562 
563 #define	cgsblock(fs, c)	(cgstart(fs, c) + (fs)->fs_sblkno)	/* super blk */
564 
565 #define	cgtod(fs, c)	(cgstart(fs, c) + (fs)->fs_cblkno)	/* cg block */
566 
567 #define	cgimin(fs, c)	(cgstart(fs, c) + (fs)->fs_iblkno)	/* inode blk */
568 
569 #define	cgdmin(fs, c)	(cgstart(fs, c) + (fs)->fs_dblkno)	/* 1st data */
570 
571 /*
572  * Macros for handling inode numbers:
573  *	inode number to file system block offset.
574  *	inode number to cylinder group number.
575  *	inode number to file system block address.
576  */
577 #define	itoo(fs, x)	((x) % (uint32_t)INOPB(fs))
578 
579 #define	itog(fs, x)	((x) / (uint32_t)(fs)->fs_ipg)
580 
581 #define	itod(fs, x) \
582 	((daddr32_t)(cgimin(fs, itog(fs, x)) + \
583 	(blkstofrags((fs), (((x)%(ulong_t)(fs)->fs_ipg)/(ulong_t)INOPB(fs))))))
584 
585 /*
586  * Give cylinder group number for a file system block.
587  * Give cylinder group block number for a file system block.
588  */
589 #define	dtog(fs, d)	((d) / (fs)->fs_fpg)
590 #define	dtogd(fs, d)	((d) % (fs)->fs_fpg)
591 
592 /*
593  * Extract the bits for a block from a map.
594  * Compute the cylinder and rotational position of a cyl block addr.
595  */
596 #define	blkmap(fs, map, loc) \
597 	(((map)[(loc) / NBBY] >> ((loc) % NBBY)) & \
598 	(0xff >> (NBBY - (fs)->fs_frag)))
599 
600 #define	cbtocylno(fs, bno) \
601 	((bno) * NSPF(fs) / (fs)->fs_spc)
602 
603 #ifdef _KERNEL
604 #define	cbtorpos(ufsvfsp, bno) \
605 	((((bno) * NSPF((ufsvfsp)->vfs_fs) % (ufsvfsp)->vfs_fs->fs_spc) % \
606 	(ufsvfsp)->vfs_fs->fs_nsect) * \
607 	(ufsvfsp)->vfs_nrpos) / (ufsvfsp)->vfs_fs->fs_nsect
608 #else
609 #define	cbtorpos(fs, bno) \
610 	((((bno) * NSPF(fs) % (fs)->fs_spc) % \
611 	(fs)->fs_nsect) * \
612 	(fs)->fs_nrpos) / (fs)->fs_nsect
613 #endif
614 
615 /*
616  * The following macros optimize certain frequently calculated
617  * quantities by using shifts and masks in place of divisions
618  * modulos and multiplications.
619  */
620 
621 /*
622  * This macro works for 40 bit offset support in ufs because
623  * this calculates offset in the block and therefore no loss of
624  * information while casting to int.
625  */
626 
627 #define	blkoff(fs, loc)		/* calculates (loc % fs->fs_bsize) */ \
628 	((int)((loc) & ~(fs)->fs_bmask))
629 
630 /*
631  * This macro works for 40 bit offset support similar to blkoff
632  */
633 
634 #define	fragoff(fs, loc)	/* calculates (loc % fs->fs_fsize) */ \
635 	((int)((loc) & ~(fs)->fs_fmask))
636 
637 /*
638  * The cast to int32_t does not result in any loss of information because
639  * the number of logical blocks in the file system is limited to
640  * what fits in an int32_t anyway.
641  */
642 
643 #define	lblkno(fs, loc)		/* calculates (loc / fs->fs_bsize) */ \
644 	((int32_t)((loc) >> (fs)->fs_bshift))
645 
646 /*
647  * The same argument as above applies here.
648  */
649 
650 #define	numfrags(fs, loc)	/* calculates (loc / fs->fs_fsize) */ \
651 	((int32_t)((loc) >> (fs)->fs_fshift))
652 
653 /*
654  * Size can be a 64-bit value and therefore we sign extend fs_bmask
655  * to a 64-bit value too so that the higher 32 bits are masked
656  * properly. Note that the type of fs_bmask has to be signed. Otherwise
657  * compiler will set the higher 32 bits as zero and we don't want
658  * this to happen.
659  */
660 
661 #define	blkroundup(fs, size)	/* calculates roundup(size, fs->fs_bsize) */ \
662 	(((size) + (fs)->fs_bsize - 1) & (offset_t)(fs)->fs_bmask)
663 
664 /*
665  * Same argument as above.
666  */
667 
668 #define	fragroundup(fs, size)	/* calculates roundup(size, fs->fs_fsize) */ \
669 	(((size) + (fs)->fs_fsize - 1) & (offset_t)(fs)->fs_fmask)
670 
671 /*
672  * frags cannot exceed 32-bit value since we only support 40bit sizes.
673  */
674 
675 #define	fragstoblks(fs, frags)	/* calculates (frags / fs->fs_frag) */ \
676 	((frags) >> (fs)->fs_fragshift)
677 
678 #define	blkstofrags(fs, blks)	/* calculates (blks * fs->fs_frag) */ \
679 	((blks) << (fs)->fs_fragshift)
680 
681 #define	fragnum(fs, fsb)	/* calculates (fsb % fs->fs_frag) */ \
682 	((fsb) & ((fs)->fs_frag - 1))
683 
684 #define	blknum(fs, fsb)		/* calculates rounddown(fsb, fs->fs_frag) */ \
685 	((fsb) &~ ((fs)->fs_frag - 1))
686 
687 /*
688  * Determine the number of available frags given a
689  * percentage to hold in reserve
690  */
691 #define	freespace(fs, ufsvfsp) \
692 	((blkstofrags((fs), (fs)->fs_cstotal.cs_nbfree) + \
693 	(fs)->fs_cstotal.cs_nffree) - (ufsvfsp)->vfs_minfrags)
694 
695 /*
696  * Determining the size of a file block in the file system.
697  */
698 
699 #define	blksize(fs, ip, lbn) \
700 	(((lbn) >= NDADDR || \
701 	(ip)->i_size >= (offset_t)((lbn) + 1) << (fs)->fs_bshift) \
702 	    ? (fs)->fs_bsize \
703 	    : (fragroundup(fs, blkoff(fs, (ip)->i_size))))
704 
705 #define	dblksize(fs, dip, lbn) \
706 	(((lbn) >= NDADDR || \
707 	(dip)->di_size >= (offset_t)((lbn) + 1) << (fs)->fs_bshift) \
708 	    ? (fs)->fs_bsize \
709 	    : (fragroundup(fs, blkoff(fs, (dip)->di_size))))
710 
711 /*
712  * Number of disk sectors per block; assumes DEV_BSIZE byte sector size.
713  */
714 #define	NSPB(fs)	((fs)->fs_nspf << (fs)->fs_fragshift)
715 #define	NSPF(fs)	((fs)->fs_nspf)
716 
717 /*
718  * INOPB is the number of inodes in a secondary storage block.
719  */
720 #define	INOPB(fs)	((fs)->fs_inopb)
721 #define	INOPF(fs)	((fs)->fs_inopb >> (fs)->fs_fragshift)
722 
723 /*
724  * NINDIR is the number of indirects in a file system block.
725  */
726 #define	NINDIR(fs)	((fs)->fs_nindir)
727 
728 /*
729  * bit map related macros
730  */
731 #define	bitloc(a, i)	((a)[(i)/NBBY])
732 #define	setbit(a, i)	((a)[(i)/NBBY] |= 1<<((i)%NBBY))
733 #define	clrbit(a, i)	((a)[(i)/NBBY] &= ~(1<<((i)%NBBY)))
734 #define	isset(a, i)	((a)[(i)/NBBY] & (1<<((i)%NBBY)))
735 #define	isclr(a, i)	(((a)[(i)/NBBY] & (1<<((i)%NBBY))) == 0)
736 
737 #define	getfs(vfsp) \
738 	((struct fs *)((struct ufsvfs *)vfsp->vfs_data)->vfs_bufp->b_un.b_addr)
739 
740 #define	RETRY_LOCK_DELAY 1
741 
742 /*
743  * Macros to test and acquire i_rwlock:
744  * some vnops hold the target directory's i_rwlock after calling
745  * ufs_lockfs_begin but in many other operations (like ufs_readdir)
746  * VOP_RWLOCK is explicitly called by the filesystem independent code before
747  * calling the file system operation. In these cases the order is reversed
748  * (i.e i_rwlock is taken first and then ufs_lockfs_begin is called). This
749  * is fine as long as ufs_lockfs_begin acts as a VOP counter but with
750  * ufs_quiesce setting the SLOCK bit this becomes a synchronizing
751  * object which might lead to a deadlock. So we use rw_tryenter instead of
752  * rw_enter. If we fail to get this lock and find that SLOCK bit is set, we
753  * call ufs_lockfs_end and restart the operation.
754  */
755 
756 #define	ufs_tryirwlock(lock, mode, label) \
757 {\
758 	indeadlock = 0;\
759 label:\
760 	if (!rw_tryenter(lock, mode))\
761 	{\
762 		if (ulp && ULOCKFS_IS_SLOCK(ulp)) {\
763 			indeadlock = 1;\
764 		} else {\
765 			delay(RETRY_LOCK_DELAY);\
766 			goto  label;\
767 		}\
768 	}\
769 }
770 
771 /*
772  * The macro ufs_tryirwlock_trans is used in functions which call
773  * TRANS_BEGIN_CSYNC and ufs_lockfs_begin, hence the need to call
774  * TRANS_END_CSYNC and ufs_lockfs_end.
775  */
776 
777 #define	ufs_tryirwlock_trans(lock, mode, transmode, label) \
778 {\
779 	indeadlock = 0;\
780 label:\
781 	if (!rw_tryenter(lock, mode))\
782 	{\
783 		if (ulp && ULOCKFS_IS_SLOCK(ulp)) {\
784 			TRANS_END_CSYNC(ufsvfsp, error, issync,\
785 				transmode, trans_size);\
786 			ufs_lockfs_end(ulp);\
787 			indeadlock = 1;\
788 		} else {\
789 			delay(RETRY_LOCK_DELAY);\
790 			goto  label;\
791 		}\
792 	}\
793 }
794 
795 #ifdef	__cplusplus
796 }
797 #endif
798 
799 #endif	/* _SYS_FS_UFS_FS_H */
800