xref: /illumos-gate/usr/src/uts/common/sys/fs/ufs_fs.h (revision ba2be53024c0b999e74ba9adcd7d80fec5df8c57)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved	*/
28 
29 /*
30  * University Copyright- Copyright (c) 1982, 1986, 1988
31  * The Regents of the University of California
32  * All Rights Reserved
33  *
34  * University Acknowledgment- Portions of this document are derived from
35  * software developed by the University of California, Berkeley, and its
36  * contributors.
37  */
38 
39 #ifndef	_SYS_FS_UFS_FS_H
40 #define	_SYS_FS_UFS_FS_H
41 
42 #pragma ident	"%Z%%M%	%I%	%E% SMI"
43 
44 #include <sys/isa_defs.h>
45 #include <sys/types32.h>
46 #include <sys/t_lock.h>		/* for kmutex_t */
47 
48 #ifdef	__cplusplus
49 extern "C" {
50 #endif
51 
52 /*
53  * The following values are minor release values for UFS.
54  * The fs_version field in the superblock will equal one of them.
55  */
56 
57 #define		MTB_UFS_VERSION_MIN	1
58 #define		MTB_UFS_VERSION_1	1
59 #define		UFS_VERSION_MIN	0
60 #define		UFS_EFISTYLE4NONEFI_VERSION_2	2
61 
62 /*
63  * Each disk drive contains some number of file systems.
64  * A file system consists of a number of cylinder groups.
65  * Each cylinder group has inodes and data.
66  *
67  * A file system is described by its super-block, which in turn
68  * describes the cylinder groups.  The super-block is critical
69  * data and is replicated in the first 10 cylinder groups and the
70  * the last 10 cylinder groups to protect against
71  * catastrophic loss.  This is done at mkfs time and the critical
72  * super-block data does not change, so the copies need not be
73  * referenced further unless disaster strikes.
74  *
75  * For file system fs, the offsets of the various blocks of interest
76  * are given in the super block as:
77  *	[fs->fs_sblkno]		Super-block
78  *	[fs->fs_cblkno]		Cylinder group block
79  *	[fs->fs_iblkno]		Inode blocks
80  *	[fs->fs_dblkno]		Data blocks
81  * The beginning of cylinder group cg in fs, is given by
82  * the ``cgbase(fs, cg)'' macro.
83  *
84  * The first boot and super blocks are given in absolute disk addresses.
85  * The byte-offset forms are preferred, as they don't imply a sector size.
86  */
87 #define	BBSIZE		8192
88 #define	SBSIZE		8192
89 #define	BBOFF		((off_t)(0))
90 #define	SBOFF		((off_t)(BBOFF + BBSIZE))
91 #define	BBLOCK		((daddr32_t)(0))
92 #define	SBLOCK		((daddr32_t)(BBLOCK + BBSIZE / DEV_BSIZE))
93 
94 /*
95  * Addresses stored in inodes are capable of addressing fragments
96  * of `blocks'. File system blocks of at most size MAXBSIZE can
97  * be optionally broken into 2, 4, or 8 pieces, each of which is
98  * addressible; these pieces may be DEV_BSIZE, or some multiple of
99  * a DEV_BSIZE unit.
100  *
101  * Large files consist of exclusively large data blocks.  To avoid
102  * undue wasted disk space, the last data block of a small file may be
103  * allocated as only as many fragments of a large block as are
104  * necessary.  The file system format retains only a single pointer
105  * to such a fragment, which is a piece of a single large block that
106  * has been divided.  The size of such a fragment is determinable from
107  * information in the inode, using the ``blksize(fs, ip, lbn)'' macro.
108  *
109  * The file system records space availability at the fragment level;
110  * to determine block availability, aligned fragments are examined.
111  *
112  * The root inode is the root of the file system.
113  * Inode 0 can't be used for normal purposes and
114  * historically bad blocks were linked to inode 1,
115  * thus the root inode is 2. (inode 1 is no longer used for
116  * this purpose, however numerous dump tapes make this
117  * assumption, so we are stuck with it)
118  * The lost+found directory is given the next available
119  * inode when it is created by ``mkfs''.
120  */
121 #define	UFSROOTINO	((ino_t)2)	/* i number of all roots */
122 #define	LOSTFOUNDINO    (UFSROOTINO + 1)
123 #ifndef _LONGLONG_TYPE
124 #define	UFS_MAXOFFSET_T	MAXOFF_T
125 #define	UFS_FILESIZE_BITS	32
126 #else
127 #define	UFS_MAXOFFSET_T	((1LL << NBBY * sizeof (daddr32_t) + DEV_BSHIFT - 1) \
128 							- 1)
129 #define	UFS_FILESIZE_BITS	41
130 #endif /* _LONGLONG_TYPE */
131 
132 /*
133  * MINBSIZE is the smallest allowable block size.
134  * In order to insure that it is possible to create files of size
135  * 2^32 with only two levels of indirection, MINBSIZE is set to 4096.
136  * MINBSIZE must be big enough to hold a cylinder group block,
137  * thus changes to (struct cg) must keep its size within MINBSIZE.
138  * Note that super blocks are always of size SBSIZE,
139  * and that both SBSIZE and MAXBSIZE must be >= MINBSIZE.
140  */
141 #define	MINBSIZE	4096
142 
143 /*
144  * The path name on which the file system is mounted is maintained
145  * in fs_fsmnt. MAXMNTLEN defines the amount of space allocated in
146  * the super block for this name.
147  * The limit on the amount of summary information per file system
148  * is defined by MAXCSBUFS. It is currently parameterized for a
149  * maximum of two million cylinders.
150  */
151 #define	MAXMNTLEN 512
152 #define	MAXCSBUFS 32
153 
154 #define	LABEL_TYPE_VTOC		1
155 #define	LABEL_TYPE_EFI		2
156 #define	LABEL_TYPE_OTHER	3
157 
158 /*
159  * The following constant is taken from the ANSI T13 ATA Specification
160  * and defines the maximum size (in sectors) that an ATA disk can be
161  * and still has to provide CHS translation. For a disk above this
162  * size all sectors are to be accessed via their LBA address. This
163  * makes a good cut off value to move from disk provided geometry
164  * to the predefined defaults used in efi label disks.
165  */
166 #define	CHSLIMIT	(63 * 256 * 1024)
167 
168 /*
169  * Per cylinder group information; summarized in blocks allocated
170  * from first cylinder group data blocks.  These blocks have to be
171  * read in from fs_csaddr (size fs_cssize) in addition to the
172  * super block.
173  *
174  * N.B. sizeof (struct csum) must be a power of two in order for
175  * the ``fs_cs'' macro to work (see below).
176  */
177 struct csum {
178 	int32_t	cs_ndir;	/* number of directories */
179 	int32_t	cs_nbfree;	/* number of free blocks */
180 	int32_t	cs_nifree;	/* number of free inodes */
181 	int32_t	cs_nffree;	/* number of free frags */
182 };
183 
184 /*
185  * In the 5.0 release, the file system state flag in the superblock (fs_clean)
186  * is now used. The value of fs_clean can be:
187  *	FSACTIVE	file system may have fsck inconsistencies
188  *	FSCLEAN		file system has successfully unmounted (implies
189  *			everything is ok)
190  *	FSSTABLE	No fsck inconsistencies, no guarantee on user data
191  *	FSBAD		file system is mounted from a partition that is
192  *			neither FSCLEAN or FSSTABLE
193  *	FSSUSPEND	Clean flag processing is temporarily disabled
194  *	FSLOG		Logging file system
195  * Under this scheme, fsck can safely skip file systems that
196  * are FSCLEAN or FSSTABLE.  To provide additional safeguard,
197  * fs_clean information could be trusted only if
198  * fs_state == FSOKAY - fs_time, where FSOKAY is a constant
199  *
200  * Note: mount(2) will now return ENOSPC if fs_clean is neither FSCLEAN nor
201  * FSSTABLE, or fs_state is not valid.  The exceptions are the root or
202  * the read-only partitions
203  */
204 
205 /*
206  * Super block for a file system.
207  *
208  * Most of the data in the super block is read-only data and needs
209  * no explicit locking to protect it. Exceptions are:
210  *	fs_time
211  *	fs_optim
212  *	fs_cstotal
213  *	fs_fmod
214  *	fs_cgrotor
215  *	fs_flags   (largefiles flag - set when a file grows large)
216  * These fields require the use of fs->fs_lock.
217  */
218 #define	FS_MAGIC	0x011954
219 #define	MTB_UFS_MAGIC	0xdecade
220 #define	FSOKAY		(0x7c269d38)
221 /*  #define	FSOKAY		(0x7c269d38 + 3) */
222 /*
223  * fs_clean values
224  */
225 #define	FSACTIVE	((char)0)
226 #define	FSCLEAN		((char)0x1)
227 #define	FSSTABLE	((char)0x2)
228 #define	FSBAD		((char)0xff)	/* mounted !FSCLEAN and !FSSTABLE */
229 #define	FSSUSPEND	((char)0xfe)	/* temporarily suspended */
230 #define	FSLOG		((char)0xfd)	/* logging fs */
231 #define	FSFIX		((char)0xfc)	/* being repaired while mounted */
232 
233 /*
234  * fs_flags values
235  */
236 #define	FSLARGEFILES	((char)0x1)	/* largefiles exist on filesystem */
237 
238 struct  fs {
239 	uint32_t fs_link;		/* linked list of file systems */
240 	uint32_t fs_rolled;		/* logging only: fs fully rolled */
241 	daddr32_t fs_sblkno;		/* addr of super-block in filesys */
242 	daddr32_t fs_cblkno;		/* offset of cyl-block in filesys */
243 	daddr32_t fs_iblkno;		/* offset of inode-blocks in filesys */
244 	daddr32_t fs_dblkno;		/* offset of first data after cg */
245 	int32_t	fs_cgoffset;		/* cylinder group offset in cylinder */
246 	int32_t	fs_cgmask;		/* used to calc mod fs_ntrak */
247 	time32_t fs_time;		/* last time written */
248 	int32_t	fs_size;		/* number of blocks in fs */
249 	int32_t	fs_dsize;		/* number of data blocks in fs */
250 	int32_t	fs_ncg;			/* number of cylinder groups */
251 	int32_t	fs_bsize;		/* size of basic blocks in fs */
252 	int32_t	fs_fsize;		/* size of frag blocks in fs */
253 	int32_t	fs_frag;		/* number of frags in a block in fs */
254 /* these are configuration parameters */
255 	int32_t	fs_minfree;		/* minimum percentage of free blocks */
256 	int32_t	fs_rotdelay;		/* num of ms for optimal next block */
257 	int32_t	fs_rps;			/* disk revolutions per second */
258 /* these fields can be computed from the others */
259 	int32_t	fs_bmask;		/* ``blkoff'' calc of blk offsets */
260 	int32_t	fs_fmask;		/* ``fragoff'' calc of frag offsets */
261 	int32_t	fs_bshift;		/* ``lblkno'' calc of logical blkno */
262 	int32_t	fs_fshift;		/* ``numfrags'' calc number of frags */
263 /* these are configuration parameters */
264 	int32_t	fs_maxcontig;		/* max number of contiguous blks */
265 	int32_t	fs_maxbpg;		/* max number of blks per cyl group */
266 /* these fields can be computed from the others */
267 	int32_t	fs_fragshift;		/* block to frag shift */
268 	int32_t	fs_fsbtodb;		/* fsbtodb and dbtofsb shift constant */
269 	int32_t	fs_sbsize;		/* actual size of super block */
270 	int32_t	fs_csmask;		/* csum block offset */
271 	int32_t	fs_csshift;		/* csum block number */
272 	int32_t	fs_nindir;		/* value of NINDIR */
273 	int32_t	fs_inopb;		/* value of INOPB */
274 	int32_t	fs_nspf;		/* value of NSPF */
275 /* yet another configuration parameter */
276 	int32_t	fs_optim;		/* optimization preference, see below */
277 /* these fields are derived from the hardware */
278 	/* USL SVR4 compatibility */
279 #ifdef _LITTLE_ENDIAN
280 	/*
281 	 * USL SVR4 compatibility
282 	 *
283 	 * There was a significant divergence here between Solaris and
284 	 * SVR4 for x86.  By swapping these two members in the superblock,
285 	 * we get read-only compatibility of SVR4 filesystems.  Otherwise
286 	 * there would be no compatibility.  This change was introduced
287 	 * during bootstrapping of Solaris on x86.  By making this ifdef'ed
288 	 * on byte order, we provide ongoing compatibility across all
289 	 * platforms with the same byte order, the highest compatibility
290 	 * that can be achieved.
291 	 */
292 	int32_t	fs_state;		/* file system state time stamp */
293 #else
294 	int32_t	fs_npsect;		/* # sectors/track including spares */
295 #endif
296 	int32_t fs_si;			/* summary info state - lufs only */
297 	int32_t	fs_trackskew;		/* sector 0 skew, per track */
298 /* a unique id for this filesystem (currently unused and unmaintained) */
299 /* In 4.3 Tahoe this space is used by fs_headswitch and fs_trkseek */
300 /* Neither of those fields is used in the Tahoe code right now but */
301 /* there could be problems if they are.				*/
302 	int32_t	fs_id[2];		/* file system id */
303 /* sizes determined by number of cylinder groups and their sizes */
304 	daddr32_t fs_csaddr;		/* blk addr of cyl grp summary area */
305 	int32_t	fs_cssize;		/* size of cyl grp summary area */
306 	int32_t	fs_cgsize;		/* cylinder group size */
307 /* these fields are derived from the hardware */
308 	int32_t	fs_ntrak;		/* tracks per cylinder */
309 	int32_t	fs_nsect;		/* sectors per track */
310 	int32_t	fs_spc;			/* sectors per cylinder */
311 /* this comes from the disk driver partitioning */
312 	int32_t	fs_ncyl;		/* cylinders in file system */
313 /* these fields can be computed from the others */
314 	int32_t	fs_cpg;			/* cylinders per group */
315 	int32_t	fs_ipg;			/* inodes per group */
316 	int32_t	fs_fpg;			/* blocks per group * fs_frag */
317 /* this data must be re-computed after crashes */
318 	struct	csum fs_cstotal;	/* cylinder summary information */
319 /* these fields are cleared at mount time */
320 	char	fs_fmod;		/* super block modified flag */
321 	char	fs_clean;		/* file system state flag */
322 	char	fs_ronly;		/* mounted read-only flag */
323 	char	fs_flags;		/* largefiles flag, etc. */
324 	char	fs_fsmnt[MAXMNTLEN];	/* name mounted on */
325 /* these fields retain the current block allocation info */
326 	int32_t	fs_cgrotor;		/* last cg searched */
327 	/*
328 	 * The following used to be fs_csp[MAXCSBUFS]. It was not
329 	 * used anywhere except in old utilities.  We removed this
330 	 * in 5.6 and expect fs_u.fs_csp to be used instead.
331 	 * We no longer limit fs_cssize based on MAXCSBUFS.
332 	 */
333 	union { 			/* fs_cs (csum) info */
334 		uint32_t fs_csp_pad[MAXCSBUFS];
335 		struct csum *fs_csp;
336 	} fs_u;
337 	int32_t	fs_cpc;			/* cyl per cycle in postbl */
338 	short	fs_opostbl[16][8];	/* old rotation block list head */
339 	int32_t	fs_sparecon[51];	/* reserved for future constants */
340 	int32_t fs_version;		/* minor version of ufs */
341 	int32_t	fs_logbno;		/* block # of embedded log */
342 	int32_t fs_reclaim;		/* reclaim open, deleted files */
343 	int32_t	fs_sparecon2;		/* reserved for future constant */
344 #ifdef _LITTLE_ENDIAN
345 	/* USL SVR4 compatibility */
346 	int32_t	fs_npsect;		/* # sectors/track including spares */
347 #else
348 	int32_t	fs_state;		/* file system state time stamp */
349 #endif
350 	quad_t	fs_qbmask;		/* ~fs_bmask - for use with quad size */
351 	quad_t	fs_qfmask;		/* ~fs_fmask - for use with quad size */
352 	int32_t	fs_postblformat;	/* format of positional layout tables */
353 	int32_t	fs_nrpos;		/* number of rotaional positions */
354 	int32_t	fs_postbloff;		/* (short) rotation block list head */
355 	int32_t	fs_rotbloff;		/* (uchar_t) blocks for each rotation */
356 	int32_t	fs_magic;		/* magic number */
357 	uchar_t	fs_space[1];		/* list of blocks for each rotation */
358 /* actually longer */
359 };
360 
361 /*
362  * values for fs_reclaim
363  */
364 #define	FS_RECLAIM	(0x00000001)	/* run the reclaim-files thread */
365 #define	FS_RECLAIMING	(0x00000002)	/* running the reclaim-files thread */
366 #define	FS_CHECKCLEAN	(0x00000004)	/* checking for a clean file system */
367 #define	FS_CHECKRECLAIM	(0x00000008)	/* checking for a reclaimable file */
368 
369 /*
370  * values for fs_rolled
371  */
372 #define	FS_PRE_FLAG	0	/* old system, prior to fs_rolled flag */
373 #define	FS_ALL_ROLLED	1
374 #define	FS_NEED_ROLL	2
375 
376 /*
377  * values for fs_si, logging only
378  * si is the summary of the summary - a copy of the cylinder group summary
379  * info held in an array for perf. On a mount if this is out of date
380  * (FS_SI_BAD) it can be re-constructed by re-reading the cgs.
381  */
382 #define	FS_SI_OK	0	/* on-disk summary info ok */
383 #define	FS_SI_BAD	1	/* out of date on-disk si */
384 
385 /*
386  * Preference for optimization.
387  */
388 #define	FS_OPTTIME	0	/* minimize allocation time */
389 #define	FS_OPTSPACE	1	/* minimize disk fragmentation */
390 
391 /*
392  * Rotational layout table format types
393  */
394 #define	FS_42POSTBLFMT		-1	/* 4.2BSD rotational table format */
395 #define	FS_DYNAMICPOSTBLFMT	1	/* dynamic rotational table format */
396 
397 /*
398  * Macros for access to superblock array structures
399  */
400 #ifdef _KERNEL
401 #define	fs_postbl(ufsvfsp, cylno) \
402 	(((ufsvfsp)->vfs_fs->fs_postblformat != FS_DYNAMICPOSTBLFMT) \
403 	? ((ufsvfsp)->vfs_fs->fs_opostbl[cylno]) \
404 	: ((short *)((char *)(ufsvfsp)->vfs_fs + \
405 	(ufsvfsp)->vfs_fs->fs_postbloff) \
406 	+ (cylno) * (ufsvfsp)->vfs_nrpos))
407 #else
408 #define	fs_postbl(fs, cylno) \
409 	(((fs)->fs_postblformat != FS_DYNAMICPOSTBLFMT) \
410 	? ((fs)->fs_opostbl[cylno]) \
411 	: ((short *)((char *)(fs) + \
412 	(fs)->fs_postbloff) \
413 	+ (cylno) * (fs)->fs_nrpos))
414 #endif
415 
416 #define	fs_rotbl(fs) \
417 	(((fs)->fs_postblformat != FS_DYNAMICPOSTBLFMT) \
418 	? ((fs)->fs_space) \
419 	: ((uchar_t *)((char *)(fs) + (fs)->fs_rotbloff)))
420 
421 /*
422  * Convert cylinder group to base address of its global summary info.
423  *
424  * N.B. This macro assumes that sizeof (struct csum) is a power of two.
425  * We just index off the first entry into one big array
426  */
427 
428 #define	fs_cs(fs, indx) fs_u.fs_csp[(indx)]
429 
430 /*
431  * Cylinder group block for a file system.
432  *
433  * Writable fields in the cylinder group are protected by the associated
434  * super block lock fs->fs_lock.
435  */
436 #define	CG_MAGIC	0x090255
437 struct	cg {
438 	uint32_t cg_link;		/* NOT USED linked list of cyl groups */
439 	int32_t	cg_magic;		/* magic number */
440 	time32_t cg_time;		/* time last written */
441 	int32_t	cg_cgx;			/* we are the cgx'th cylinder group */
442 	short	cg_ncyl;		/* number of cyl's this cg */
443 	short	cg_niblk;		/* number of inode blocks this cg */
444 	int32_t	cg_ndblk;		/* number of data blocks this cg */
445 	struct	csum cg_cs;		/* cylinder summary information */
446 	int32_t	cg_rotor;		/* position of last used block */
447 	int32_t	cg_frotor;		/* position of last used frag */
448 	int32_t	cg_irotor;		/* position of last used inode */
449 	int32_t	cg_frsum[MAXFRAG];	/* counts of available frags */
450 	int32_t	cg_btotoff;		/* (int32_t)block totals per cylinder */
451 	int32_t	cg_boff;		/* (short) free block positions */
452 	int32_t	cg_iusedoff;		/* (char) used inode map */
453 	int32_t	cg_freeoff;		/* (uchar_t) free block map */
454 	int32_t	cg_nextfreeoff;		/* (uchar_t) next available space */
455 	int32_t	cg_sparecon[16];	/* reserved for future use */
456 	uchar_t	cg_space[1];		/* space for cylinder group maps */
457 /* actually longer */
458 };
459 
460 /*
461  * Macros for access to cylinder group array structures
462  */
463 
464 #define	cg_blktot(cgp) \
465 	(((cgp)->cg_magic != CG_MAGIC) \
466 	? (((struct ocg *)(cgp))->cg_btot) \
467 	: ((int32_t *)((char *)(cgp) + (cgp)->cg_btotoff)))
468 
469 #ifdef _KERNEL
470 #define	cg_blks(ufsvfsp, cgp, cylno) \
471 	(((cgp)->cg_magic != CG_MAGIC) \
472 	? (((struct ocg *)(cgp))->cg_b[cylno]) \
473 	: ((short *)((char *)(cgp) + (cgp)->cg_boff) + \
474 	(cylno) * (ufsvfsp)->vfs_nrpos))
475 #else
476 #define	cg_blks(fs, cgp, cylno) \
477 	(((cgp)->cg_magic != CG_MAGIC) \
478 	? (((struct ocg *)(cgp))->cg_b[cylno]) \
479 	: ((short *)((char *)(cgp) + (cgp)->cg_boff) + \
480 	(cylno) * (fs)->fs_nrpos))
481 #endif
482 
483 #define	cg_inosused(cgp) \
484 	(((cgp)->cg_magic != CG_MAGIC) \
485 	? (((struct ocg *)(cgp))->cg_iused) \
486 	: ((char *)((char *)(cgp) + (cgp)->cg_iusedoff)))
487 
488 #define	cg_blksfree(cgp) \
489 	(((cgp)->cg_magic != CG_MAGIC) \
490 	? (((struct ocg *)(cgp))->cg_free) \
491 	: ((uchar_t *)((char *)(cgp) + (cgp)->cg_freeoff)))
492 
493 #define	cg_chkmagic(cgp) \
494 	((cgp)->cg_magic == CG_MAGIC || \
495 	((struct ocg *)(cgp))->cg_magic == CG_MAGIC)
496 
497 /*
498  * The following structure is defined
499  * for compatibility with old file systems.
500  */
501 struct	ocg {
502 	uint32_t cg_link;		/* NOT USED linked list of cyl groups */
503 	uint32_t cg_rlink;		/* NOT USED incore cyl groups */
504 	time32_t cg_time;		/* time last written */
505 	int32_t	cg_cgx;			/* we are the cgx'th cylinder group */
506 	short	cg_ncyl;		/* number of cyl's this cg */
507 	short	cg_niblk;		/* number of inode blocks this cg */
508 	int32_t	cg_ndblk;		/* number of data blocks this cg */
509 	struct	csum cg_cs;		/* cylinder summary information */
510 	int32_t	cg_rotor;		/* position of last used block */
511 	int32_t	cg_frotor;		/* position of last used frag */
512 	int32_t	cg_irotor;		/* position of last used inode */
513 	int32_t	cg_frsum[8];		/* counts of available frags */
514 	int32_t	cg_btot[32];		/* block totals per cylinder */
515 	short	cg_b[32][8];		/* positions of free blocks */
516 	char	cg_iused[256];		/* used inode map */
517 	int32_t	cg_magic;		/* magic number */
518 	uchar_t	cg_free[1];		/* free block map */
519 /* actually longer */
520 };
521 
522 /*
523  * Turn frag offsets into disk block addresses.
524  * This maps frags to device size blocks.
525  * (In the names of these macros, "fsb" refers to "frags", not
526  * file system blocks.)
527  */
528 #ifdef KERNEL
529 #define	fsbtodb(fs, b)	(((daddr_t)(b)) << (fs)->fs_fsbtodb)
530 #else /* KERNEL */
531 #define	fsbtodb(fs, b)	(((diskaddr_t)(b)) << (fs)->fs_fsbtodb)
532 #endif /* KERNEL */
533 
534 #define	dbtofsb(fs, b)	((b) >> (fs)->fs_fsbtodb)
535 
536 /*
537  * Get the offset of the log, in either sectors, frags, or file system
538  * blocks.  The interpretation of the fs_logbno field depends on whether
539  * this is UFS or MTB UFS.  (UFS stores the value as sectors.  MTBUFS
540  * stores the value as frags.)
541  */
542 
543 #ifdef KERNEL
544 #define	logbtodb(fs, b)	((fs)->fs_magic == FS_MAGIC ? \
545 		(daddr_t)(b) : ((daddr_t)(b) << (fs)->fs_fsbtodb))
546 #else /* KERNEL */
547 #define	logbtodb(fs, b)	((fs)->fs_magic == FS_MAGIC ? \
548 		(diskaddr_t)(b) : ((diskaddr_t)(b) << (fs)->fs_fsbtodb))
549 #endif /* KERNEL */
550 #define	logbtofrag(fs, b)	((fs)->fs_magic == FS_MAGIC ? \
551 		(b) >> (fs)->fs_fsbtodb : (b))
552 #define	logbtofsblk(fs, b) ((fs)->fs_magic == FS_MAGIC ? \
553 		(b) >> ((fs)->fs_fsbtodb + (fs)->fs_fragshift) : \
554 		(b) >> (fs)->fs_fragshift)
555 
556 /*
557  * Cylinder group macros to locate things in cylinder groups.
558  * They calc file system addresses of cylinder group data structures.
559  */
560 #define	cgbase(fs, c)	((daddr32_t)((fs)->fs_fpg * (c)))
561 
562 #define	cgstart(fs, c) \
563 	(cgbase(fs, c) + (fs)->fs_cgoffset * ((c) & ~((fs)->fs_cgmask)))
564 
565 #define	cgsblock(fs, c)	(cgstart(fs, c) + (fs)->fs_sblkno)	/* super blk */
566 
567 #define	cgtod(fs, c)	(cgstart(fs, c) + (fs)->fs_cblkno)	/* cg block */
568 
569 #define	cgimin(fs, c)	(cgstart(fs, c) + (fs)->fs_iblkno)	/* inode blk */
570 
571 #define	cgdmin(fs, c)	(cgstart(fs, c) + (fs)->fs_dblkno)	/* 1st data */
572 
573 /*
574  * Macros for handling inode numbers:
575  *	inode number to file system block offset.
576  *	inode number to cylinder group number.
577  *	inode number to file system block address.
578  */
579 #define	itoo(fs, x)	((x) % (uint32_t)INOPB(fs))
580 
581 #define	itog(fs, x)	((x) / (uint32_t)(fs)->fs_ipg)
582 
583 #define	itod(fs, x) \
584 	((daddr32_t)(cgimin(fs, itog(fs, x)) + \
585 	(blkstofrags((fs), (((x)%(ulong_t)(fs)->fs_ipg)/(ulong_t)INOPB(fs))))))
586 
587 /*
588  * Give cylinder group number for a file system block.
589  * Give cylinder group block number for a file system block.
590  */
591 #define	dtog(fs, d)	((d) / (fs)->fs_fpg)
592 #define	dtogd(fs, d)	((d) % (fs)->fs_fpg)
593 
594 /*
595  * Extract the bits for a block from a map.
596  * Compute the cylinder and rotational position of a cyl block addr.
597  */
598 #define	blkmap(fs, map, loc) \
599 	(((map)[(loc) / NBBY] >> ((loc) % NBBY)) & \
600 	(0xff >> (NBBY - (fs)->fs_frag)))
601 
602 #define	cbtocylno(fs, bno) \
603 	((bno) * NSPF(fs) / (fs)->fs_spc)
604 
605 #ifdef _KERNEL
606 #define	cbtorpos(ufsvfsp, bno) \
607 	((((bno) * NSPF((ufsvfsp)->vfs_fs) % (ufsvfsp)->vfs_fs->fs_spc) % \
608 	(ufsvfsp)->vfs_fs->fs_nsect) * \
609 	(ufsvfsp)->vfs_nrpos) / (ufsvfsp)->vfs_fs->fs_nsect
610 #else
611 #define	cbtorpos(fs, bno) \
612 	((((bno) * NSPF(fs) % (fs)->fs_spc) % \
613 	(fs)->fs_nsect) * \
614 	(fs)->fs_nrpos) / (fs)->fs_nsect
615 #endif
616 
617 /*
618  * The following macros optimize certain frequently calculated
619  * quantities by using shifts and masks in place of divisions
620  * modulos and multiplications.
621  */
622 
623 /*
624  * This macro works for 40 bit offset support in ufs because
625  * this calculates offset in the block and therefore no loss of
626  * information while casting to int.
627  */
628 
629 #define	blkoff(fs, loc)		/* calculates (loc % fs->fs_bsize) */ \
630 	((int)((loc) & ~(fs)->fs_bmask))
631 
632 /*
633  * This macro works for 40 bit offset support similar to blkoff
634  */
635 
636 #define	fragoff(fs, loc)	/* calculates (loc % fs->fs_fsize) */ \
637 	((int)((loc) & ~(fs)->fs_fmask))
638 
639 /*
640  * The cast to int32_t does not result in any loss of information because
641  * the number of logical blocks in the file system is limited to
642  * what fits in an int32_t anyway.
643  */
644 
645 #define	lblkno(fs, loc)		/* calculates (loc / fs->fs_bsize) */ \
646 	((int32_t)((loc) >> (fs)->fs_bshift))
647 
648 /*
649  * The same argument as above applies here.
650  */
651 
652 #define	numfrags(fs, loc)	/* calculates (loc / fs->fs_fsize) */ \
653 	((int32_t)((loc) >> (fs)->fs_fshift))
654 
655 /*
656  * Size can be a 64-bit value and therefore we sign extend fs_bmask
657  * to a 64-bit value too so that the higher 32 bits are masked
658  * properly. Note that the type of fs_bmask has to be signed. Otherwise
659  * compiler will set the higher 32 bits as zero and we don't want
660  * this to happen.
661  */
662 
663 #define	blkroundup(fs, size)	/* calculates roundup(size, fs->fs_bsize) */ \
664 	(((size) + (fs)->fs_bsize - 1) & (offset_t)(fs)->fs_bmask)
665 
666 /*
667  * Same argument as above.
668  */
669 
670 #define	fragroundup(fs, size)	/* calculates roundup(size, fs->fs_fsize) */ \
671 	(((size) + (fs)->fs_fsize - 1) & (offset_t)(fs)->fs_fmask)
672 
673 /*
674  * frags cannot exceed 32-bit value since we only support 40bit sizes.
675  */
676 
677 #define	fragstoblks(fs, frags)	/* calculates (frags / fs->fs_frag) */ \
678 	((frags) >> (fs)->fs_fragshift)
679 
680 #define	blkstofrags(fs, blks)	/* calculates (blks * fs->fs_frag) */ \
681 	((blks) << (fs)->fs_fragshift)
682 
683 #define	fragnum(fs, fsb)	/* calculates (fsb % fs->fs_frag) */ \
684 	((fsb) & ((fs)->fs_frag - 1))
685 
686 #define	blknum(fs, fsb)		/* calculates rounddown(fsb, fs->fs_frag) */ \
687 	((fsb) &~ ((fs)->fs_frag - 1))
688 
689 /*
690  * Determine the number of available frags given a
691  * percentage to hold in reserve
692  */
693 #define	freespace(fs, ufsvfsp) \
694 	((blkstofrags((fs), (fs)->fs_cstotal.cs_nbfree) + \
695 	(fs)->fs_cstotal.cs_nffree) - (ufsvfsp)->vfs_minfrags)
696 
697 /*
698  * Determining the size of a file block in the file system.
699  */
700 
701 #define	blksize(fs, ip, lbn) \
702 	(((lbn) >= NDADDR || \
703 	(ip)->i_size >= (offset_t)((lbn) + 1) << (fs)->fs_bshift) \
704 	    ? (fs)->fs_bsize \
705 	    : (fragroundup(fs, blkoff(fs, (ip)->i_size))))
706 
707 #define	dblksize(fs, dip, lbn) \
708 	(((lbn) >= NDADDR || \
709 	(dip)->di_size >= (offset_t)((lbn) + 1) << (fs)->fs_bshift) \
710 	    ? (fs)->fs_bsize \
711 	    : (fragroundup(fs, blkoff(fs, (dip)->di_size))))
712 
713 /*
714  * Number of disk sectors per block; assumes DEV_BSIZE byte sector size.
715  */
716 #define	NSPB(fs)	((fs)->fs_nspf << (fs)->fs_fragshift)
717 #define	NSPF(fs)	((fs)->fs_nspf)
718 
719 /*
720  * INOPB is the number of inodes in a secondary storage block.
721  */
722 #define	INOPB(fs)	((fs)->fs_inopb)
723 #define	INOPF(fs)	((fs)->fs_inopb >> (fs)->fs_fragshift)
724 
725 /*
726  * NINDIR is the number of indirects in a file system block.
727  */
728 #define	NINDIR(fs)	((fs)->fs_nindir)
729 
730 /*
731  * bit map related macros
732  */
733 #define	bitloc(a, i)	((a)[(i)/NBBY])
734 #define	setbit(a, i)	((a)[(i)/NBBY] |= 1<<((i)%NBBY))
735 #define	clrbit(a, i)	((a)[(i)/NBBY] &= ~(1<<((i)%NBBY)))
736 #define	isset(a, i)	((a)[(i)/NBBY] & (1<<((i)%NBBY)))
737 #define	isclr(a, i)	(((a)[(i)/NBBY] & (1<<((i)%NBBY))) == 0)
738 
739 #define	getfs(vfsp) \
740 	((struct fs *)((struct ufsvfs *)vfsp->vfs_data)->vfs_bufp->b_un.b_addr)
741 
742 #define	RETRY_LOCK_DELAY 1
743 
744 /*
745  * Macros to test and acquire i_rwlock:
746  * some vnops hold the target directory's i_rwlock after calling
747  * ufs_lockfs_begin but in many other operations (like ufs_readdir)
748  * VOP_RWLOCK is explicitly called by the filesystem independent code before
749  * calling the file system operation. In these cases the order is reversed
750  * (i.e i_rwlock is taken first and then ufs_lockfs_begin is called). This
751  * is fine as long as ufs_lockfs_begin acts as a VOP counter but with
752  * ufs_quiesce setting the SLOCK bit this becomes a synchronizing
753  * object which might lead to a deadlock. So we use rw_tryenter instead of
754  * rw_enter. If we fail to get this lock and find that SLOCK bit is set, we
755  * call ufs_lockfs_end and restart the operation.
756  */
757 
758 #define	ufs_tryirwlock(lock, mode, label) \
759 {\
760 	indeadlock = 0;\
761 label:\
762 	if (!rw_tryenter(lock, mode))\
763 	{\
764 		if (ulp && ULOCKFS_IS_SLOCK(ulp)) {\
765 			indeadlock = 1;\
766 		} else {\
767 			delay(RETRY_LOCK_DELAY);\
768 			goto  label;\
769 		}\
770 	}\
771 }
772 
773 /*
774  * The macro ufs_tryirwlock_trans is used in functions which call
775  * TRANS_BEGIN_CSYNC and ufs_lockfs_begin, hence the need to call
776  * TRANS_END_CSYNC and ufs_lockfs_end.
777  */
778 
779 #define	ufs_tryirwlock_trans(lock, mode, transmode, label) \
780 {\
781 	indeadlock = 0;\
782 label:\
783 	if (!rw_tryenter(lock, mode))\
784 	{\
785 		if (ulp && ULOCKFS_IS_SLOCK(ulp)) {\
786 			TRANS_END_CSYNC(ufsvfsp, error, issync,\
787 				transmode, trans_size);\
788 			ufs_lockfs_end(ulp);\
789 			indeadlock = 1;\
790 		} else {\
791 			delay(RETRY_LOCK_DELAY);\
792 			goto  label;\
793 		}\
794 	}\
795 }
796 
797 #ifdef	__cplusplus
798 }
799 #endif
800 
801 #endif	/* _SYS_FS_UFS_FS_H */
802