xref: /titanic_41/usr/src/cmd/fs.d/ufs/mkfs/mkfs.c (revision 5ebc22727c394636ca1111875be60eb4705d818f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*
31  * University Copyright- Copyright (c) 1982, 1986, 1988
32  * The Regents of the University of California
33  * All Rights Reserved
34  *
35  * University Acknowledgment- Portions of this document are derived from
36  * software developed by the University of California, Berkeley, and its
37  * contributors.
38  */
39 
40 #pragma ident	"%Z%%M%	%I%	%E% SMI"
41 
42 
43 /*
44  * The maximum supported file system size (in sectors) is the
45  * number of frags that can be represented in an int32_t field
46  * (INT_MAX) times the maximum number of sectors per frag.  Since
47  * the maximum frag size is MAXBSIZE, the maximum number of sectors
48  * per frag is MAXBSIZE/DEV_BSIZE.
49  */
50 #define	FS_MAX	(((diskaddr_t)INT_MAX) * (MAXBSIZE/DEV_BSIZE))
51 
52 /*
53  * make file system for cylinder-group style file systems
54  *
55  * usage:
56  *
57  *    mkfs [-F FSType] [-V] [-G [-P]] [-M dirname] [-m] [options]
58  *	[-o specific_options]  special size
59  *	[nsect ntrack bsize fsize cpg	minfree	rps nbpi opt apc rotdelay
60  *	  2     3      4     5     6	7	8   9	 10  11  12
61  *	nrpos maxcontig mtb]
62  *	13    14	15
63  *
64  *  where specific_options are:
65  *	N - no create
66  *	nsect - The number of sectors per track
67  *	ntrack - The number of tracks per cylinder
68  *	bsize - block size
69  *	fragsize - fragment size
70  *	cgsize - The number of disk cylinders per cylinder group.
71  * 	free - minimum free space
72  *	rps - rotational speed (rev/sec).
73  *	nbpi - number of data bytes per allocated inode
74  *	opt - optimization (space, time)
75  *	apc - number of alternates
76  *	gap - gap size
77  *	nrpos - number of rotational positions
78  *	maxcontig - maximum number of logical blocks that will be
79  *		allocated contiguously before inserting rotational delay
80  *	mtb - if "y", set up file system for eventual growth to over a
81  *		a terabyte
82  * -P Do not grow the file system, but print on stdout the maximal
83  *    size in sectors to which the file system can be increased. The calculated
84  *    size is limited by the value provided by the operand size.
85  *
86  * Note that -P is a project-private interface and together with -G intended
87  * to be used only by the growfs script. It is therefore purposely not
88  * documented in the man page.
89  * The -P option is covered by PSARC case 2003/422.
90  */
91 
92 /*
93  * The following constants set the defaults used for the number
94  * of sectors/track (fs_nsect), and number of tracks/cyl (fs_ntrak).
95  *
96  *			NSECT		NTRAK
97  *	72MB CDC	18		9
98  *	30MB CDC	18		5
99  *	720KB Diskette	9		2
100  */
101 
102 #define	DFLNSECT	32
103 #define	DFLNTRAK	16
104 
105 /*
106  * The following two constants set the default block and fragment sizes.
107  * Both constants must be a power of 2 and meet the following constraints:
108  *	MINBSIZE <= DESBLKSIZE <= MAXBSIZE
109  *	DEV_BSIZE <= DESFRAGSIZE <= DESBLKSIZE
110  *	DESBLKSIZE / DESFRAGSIZE <= 8
111  */
112 #define	DESBLKSIZE	8192
113 #define	DESFRAGSIZE	1024
114 
115 /*
116  * The maximum number of cylinders in a group depends upon how much
117  * information can be stored on a single cylinder. The default is to
118  * use 16 cylinders per group.  This is effectively tradition - it was
119  * the largest value acceptable under SunOs 4.1
120  */
121 #define	DESCPG		16	/* desired fs_cpg */
122 
123 /*
124  * MINFREE gives the minimum acceptable percentage of file system
125  * blocks which may be free. If the freelist drops below this level
126  * only the superuser may continue to allocate blocks. This may
127  * be set to 0 if no reserve of free blocks is deemed necessary,
128  * however throughput drops by fifty percent if the file system
129  * is run at between 90% and 100% full; thus the default value of
130  * fs_minfree is 10%. With 10% free space, fragmentation is not a
131  * problem, so we choose to optimize for time.
132  */
133 #define	MINFREE		10
134 #define	DEFAULTOPT	FS_OPTTIME
135 
136 /*
137  * ROTDELAY gives the minimum number of milliseconds to initiate
138  * another disk transfer on the same cylinder. It is no longer used
139  * and will always default to 0.
140  */
141 #define	ROTDELAY	0
142 
143 /*
144  * MAXBLKPG determines the maximum number of data blocks which are
145  * placed in a single cylinder group. The default is one indirect
146  * block worth of data blocks.
147  */
148 #define	MAXBLKPG(bsize)	((bsize) / sizeof (daddr32_t))
149 
150 /*
151  * Each file system has a number of inodes statically allocated.
152  * We allocate one inode slot per NBPI bytes, expecting this
153  * to be far more than we will ever need.
154  */
155 #define	NBPI		2048	/* Number Bytes Per Inode */
156 #define	MTB_NBPI	(MB)	/* Number Bytes Per Inode for multi-terabyte */
157 
158 /*
159  * Disks are assumed to rotate at 60HZ, unless otherwise specified.
160  */
161 #define	DEFHZ		60
162 
163 /*
164  * Cylinder group related limits.
165  *
166  * For each cylinder we keep track of the availability of blocks at different
167  * rotational positions, so that we can lay out the data to be picked
168  * up with minimum rotational latency.  NRPOS is the number of rotational
169  * positions which we distinguish.  With NRPOS 8 the resolution of our
170  * summary information is 2ms for a typical 3600 rpm drive.
171  */
172 #define	NRPOS		8	/* number distinct rotational positions */
173 
174 /*
175  * range_check "user_supplied" flag values.
176  */
177 #define	RC_DEFAULT	0
178 #define	RC_KEYWORD	1
179 #define	RC_POSITIONAL	2
180 
181 #ifndef	STANDALONE
182 #include	<stdio.h>
183 #include	<sys/mnttab.h>
184 #endif
185 
186 #include	<stdlib.h>
187 #include	<unistd.h>
188 #include	<malloc.h>
189 #include	<string.h>
190 #include	<strings.h>
191 #include	<ctype.h>
192 #include	<errno.h>
193 #include	<sys/param.h>
194 #include	<time.h>
195 #include	<sys/types.h>
196 #include	<sys/sysmacros.h>
197 #include	<sys/vnode.h>
198 #include	<sys/fs/ufs_fsdir.h>
199 #include	<sys/fs/ufs_inode.h>
200 #include	<sys/fs/ufs_fs.h>
201 #include	<sys/fs/ufs_log.h>
202 #include	<sys/mntent.h>
203 #include	<sys/filio.h>
204 #include	<limits.h>
205 #include	<sys/int_const.h>
206 #include	<signal.h>
207 #include	<sys/efi_partition.h>
208 #include	"roll_log.h"
209 
210 #define	bcopy(f, t, n)    (void) memcpy(t, f, n)
211 #define	bzero(s, n)	(void) memset(s, 0, n)
212 #define	bcmp(s, d, n)	memcmp(s, d, n)
213 
214 #define	index(s, r)	strchr(s, r)
215 #define	rindex(s, r)	strrchr(s, r)
216 
217 #include	<sys/stat.h>
218 #include	<sys/statvfs.h>
219 #include	<locale.h>
220 #include	<fcntl.h>
221 #include 	<sys/isa_defs.h>	/* for ENDIAN defines */
222 #include	<sys/vtoc.h>
223 
224 #include	<sys/dkio.h>
225 #include	<sys/asynch.h>
226 
227 extern offset_t	llseek();
228 extern char	*getfullblkname();
229 extern long	lrand48();
230 
231 extern int	optind;
232 extern char	*optarg;
233 
234 
235 /*
236  * The size of a cylinder group is calculated by CGSIZE. The maximum size
237  * is limited by the fact that cylinder groups are at most one block.
238  * Its size is derived from the size of the maps maintained in the
239  * cylinder group and the (struct cg) size.
240  */
241 #define	CGSIZE(fs) \
242 	/* base cg		*/ (sizeof (struct cg) + \
243 	/* blktot size	*/ (fs)->fs_cpg * sizeof (long) + \
244 	/* blks size	*/ (fs)->fs_cpg * (fs)->fs_nrpos * sizeof (short) + \
245 	/* inode map	*/ howmany((fs)->fs_ipg, NBBY) + \
246 	/* block map */ howmany((fs)->fs_cpg * (fs)->fs_spc / NSPF(fs), NBBY))
247 
248 /*
249  * We limit the size of the inode map to be no more than a
250  * third of the cylinder group space, since we must leave at
251  * least an equal amount of space for the block map.
252  *
253  * N.B.: MAXIpG must be a multiple of INOPB(fs).
254  */
255 #define	MAXIpG(fs)	roundup((fs)->fs_bsize * NBBY / 3, INOPB(fs))
256 
257 /*
258  * Same as MAXIpG, but parameterized by the block size (b) and the
259  * cylinder group divisor (d), which is the reciprocal of the fraction of the
260  * cylinder group overhead block that is used for the inode map.  So for
261  * example, if d = 5, the macro's computation assumes that 1/5 of the
262  * cylinder group overhead block can be dedicated to the inode map.
263  */
264 #define	MAXIpG_B(b, d)	roundup((b) * NBBY / (d), (b) / sizeof (struct dinode))
265 
266 #define	UMASK		0755
267 #define	MAXINOPB	(MAXBSIZE / sizeof (struct dinode))
268 #define	POWEROF2(num)	(((num) & ((num) - 1)) == 0)
269 #define	MB		(1024*1024)
270 #define	BETWEEN(x, l, h)	((x) >= (l) && (x) <= (h))
271 
272 /*
273  * Used to set the inode generation number. Since both inodes and dinodes
274  * are dealt with, we really need a pointer to an icommon here.
275  */
276 #define	IRANDOMIZE(icp)	(icp)->ic_gen = lrand48();
277 
278 /*
279  * Flags for number()
280  */
281 #define	ALLOW_PERCENT	0x01	/* allow trailing `%' on number */
282 #define	ALLOW_MS1	0x02	/* allow trailing `ms', state 1 */
283 #define	ALLOW_MS2	0x04	/* allow trailing `ms', state 2 */
284 #define	ALLOW_END_ONLY	0x08	/* must be at end of number & suffixes */
285 
286 #define	MAXAIO	1000	/* maximum number of outstanding I/O's we'll manage */
287 #define	BLOCK	1	/* block in aiowait */
288 #define	NOBLOCK	0	/* don't block in aiowait */
289 
290 #define	RELEASE 1	/* free an aio buffer after use */
291 #define	SAVE	0	/* don't free the buffer */
292 
293 typedef struct aio_trans {
294 	aio_result_t resultbuf;
295 	diskaddr_t bno;
296 	char *buffer;
297 	int size;
298 	int release;
299 	struct aio_trans *next;
300 } aio_trans;
301 
302 typedef struct aio_results {
303 	int max;
304 	int outstanding;
305 	int maxpend;
306 	aio_trans *trans;
307 } aio_results;
308 
309 int aio_inited = 0;
310 aio_results results;
311 
312 /*
313  * Allow up to MAXBUF aio requests that each have a unique buffer.
314  * More aio's might be done, but not using memory through the getbuf()
315  * interface.  This can be raised, but you run into the potential of
316  * using more memory than is physically available on the machine,
317  * and if you start swapping, you can forget about performance.
318  * To prevent this, we also limit the total memory used for a given
319  * type of buffer to MAXBUFMEM.
320  *
321  * Tests indicate a cylinder group's worth of inodes takes:
322  *
323  *	NBPI	Size of Inode Buffer
324  *	 2k	1688k
325  *	 8k	 424k
326  *
327  * initcg() stores all the inodes for a cylinder group in one buffer,
328  * so allowing 20 buffers could take 32 MB if not limited by MAXBUFMEM.
329  */
330 #define	MAXBUF		20
331 #define	MAXBUFMEM	(8 * 1024 * 1024)
332 
333 /*
334  * header information for buffers managed by getbuf() and freebuf()
335  */
336 typedef struct bufhdr {
337 	struct bufhdr *head;
338 	struct bufhdr *next;
339 } bufhdr;
340 
341 int bufhdrsize;
342 
343 bufhdr inodebuf = { NULL, NULL };
344 bufhdr cgsumbuf = { NULL, NULL };
345 
346 #define	SECTORS_PER_TERABYTE	(1LL << 31)
347 /*
348  * The following constant specifies an upper limit for file system size
349  * that is actually a lot bigger than we expect to support with UFS. (Since
350  * it's specified in sectors, the file system size would be 2**44 * 512,
351  * which is 2**53, which is 8192 Terabytes.)  However, it's useful
352  * for checking the basic sanity of a size value that is input on the
353  * command line.
354  */
355 #define	FS_SIZE_UPPER_LIMIT	0x100000000000LL
356 
357 /*
358  * Forward declarations
359  */
360 static char *getbuf(bufhdr *bufhead, int size);
361 static void freebuf(char *buf);
362 static void freetrans(aio_trans *transp);
363 static aio_trans *get_aiop();
364 static aio_trans *wait_for_write(int block);
365 static void initcg(int cylno);
366 static void fsinit();
367 static int makedir(struct direct *protodir, int entries);
368 static void iput(struct inode *ip);
369 static void rdfs(diskaddr_t bno, int size, char *bf);
370 static void wtfs(diskaddr_t bno, int size, char *bf);
371 static void awtfs(diskaddr_t bno, int size, char *bf, int release);
372 static void wtfs_breakup(diskaddr_t bno, int size, char *bf);
373 static int isblock(struct fs *fs, unsigned char *cp, int h);
374 static void clrblock(struct fs *fs, unsigned char *cp, int h);
375 static void setblock(struct fs *fs, unsigned char *cp, int h);
376 static void usage();
377 static void dump_fscmd(char *fsys, int fsi);
378 static uint64_t number(uint64_t d_value, char *param, int flags);
379 static int match(char *s);
380 static char checkopt(char *optim);
381 static char checkmtb(char *mtbarg);
382 static void range_check(long *varp, char *name, long minimum,
383     long maximum, long def_val, int user_supplied);
384 static void range_check_64(uint64_t *varp, char *name, uint64_t minimum,
385     uint64_t maximum, uint64_t def_val, int user_supplied);
386 static daddr32_t alloc(int size, int mode);
387 static diskaddr_t get_max_size(int fd);
388 static long get_max_track_size(int fd);
389 static void block_sigint(sigset_t *old_mask);
390 static void unblock_sigint(sigset_t *old_mask);
391 static void recover_from_sigint(int signum);
392 static int confirm_abort(void);
393 static int getline(FILE *fp, char *loc, int maxlen);
394 static void flush_writes(void);
395 static long compute_maxcpg(long, long, long, long, long);
396 static int in_64bit_mode(void);
397 static int validate_size(int fd, diskaddr_t size);
398 static void dump_sblock(void);
399 
400 union {
401 	struct fs fs;
402 	char pad[SBSIZE];
403 } fsun;
404 #define	sblock	fsun.fs
405 
406 struct	csum *fscs;
407 
408 union cgun {
409 	struct cg cg;
410 	char pad[MAXBSIZE];
411 } cgun;
412 
413 #define	acg	cgun.cg
414 /*
415  * Size of screen in cols in which to fit output
416  */
417 #define	WIDTH	80
418 
419 struct dinode zino[MAXBSIZE / sizeof (struct dinode)];
420 
421 /*
422  * file descriptors used for rdfs(fsi) and wtfs(fso).
423  * Initialized to an illegal file descriptor number.
424  */
425 int	fsi = -1;
426 int	fso = -1;
427 
428 /*
429  * The BIG parameter is machine dependent.  It should be a longlong integer
430  * constant that can be used by the number parser to check the validity
431  * of numeric parameters.
432  */
433 
434 #define	BIG		0x7fffffffffffffffLL
435 
436 /* Used to indicate to number() that a bogus value should cause us to exit */
437 #define	NO_DEFAULT	LONG_MIN
438 
439 /*
440  * The *_flag variables are used to indicate that the user specified
441  * the values, rather than that we made them up ourselves.  We can
442  * complain about the user giving us bogus values.
443  */
444 
445 /* semi-constants */
446 long	sectorsize = DEV_BSIZE;		/* bytes/sector from param.h */
447 long	bbsize = BBSIZE;		/* boot block size */
448 long	sbsize = SBSIZE;		/* superblock size */
449 
450 /* parameters */
451 diskaddr_t	fssize_db;		/* file system size in disk blocks */
452 diskaddr_t	fssize_frag;		/* file system size in frags */
453 long	cpg;				/* cylinders/cylinder group */
454 int	cpg_flag = RC_DEFAULT;
455 long	rotdelay = -1;			/* rotational delay between blocks */
456 int	rotdelay_flag = RC_DEFAULT;
457 long	maxcontig;			/* max contiguous blocks to allocate */
458 int	maxcontig_flag = RC_DEFAULT;
459 long	nsect = DFLNSECT;		/* sectors per track */
460 int	nsect_flag = RC_DEFAULT;
461 long	ntrack = DFLNTRAK;		/* tracks per cylinder group */
462 int	ntrack_flag = RC_DEFAULT;
463 long	bsize = DESBLKSIZE;		/* filesystem block size */
464 int	bsize_flag = RC_DEFAULT;
465 long	fragsize = DESFRAGSIZE; 	/* filesystem fragment size */
466 int	fragsize_flag = RC_DEFAULT;
467 long	minfree = MINFREE; 		/* fs_minfree */
468 int	minfree_flag = RC_DEFAULT;
469 long	rps = DEFHZ;			/* revolutions/second of drive */
470 int	rps_flag = RC_DEFAULT;
471 long	nbpi = NBPI;			/* number of bytes per inode */
472 int	nbpi_flag = RC_DEFAULT;
473 long	nrpos = NRPOS;			/* number of rotational positions */
474 int	nrpos_flag = RC_DEFAULT;
475 long	apc = 0;			/* alternate sectors per cylinder */
476 int	apc_flag = RC_DEFAULT;
477 char	opt = 't';			/* optimization style, `t' or `s' */
478 char	mtb = 'n';			/* multi-terabyte format, 'y' or 'n' */
479 
480 long	debug = 0;			/* enable debugging output */
481 
482 int	spc_flag = 0;			/* alternate sectors specified or */
483 					/* found */
484 
485 /* global state */
486 int	Nflag;		/* do not write to disk */
487 int	mflag;		/* return the command line used to create this FS */
488 int	rflag;		/* report the superblock in an easily-parsed form */
489 int	Rflag;		/* dump the superblock in binary */
490 char	*fsys;
491 time_t	mkfstime;
492 char	*string;
493 
494 /*
495  * logging support
496  */
497 int	ismdd;			/* true if device is a SVM device */
498 int	islog;			/* true if ufs or SVM logging is enabled */
499 int	islogok;		/* true if ufs/SVM log state is good */
500 
501 static int	isufslog;	/* true if ufs logging is enabled */
502 static int	waslog;		/* true when ufs logging disabled during grow */
503 
504 /*
505  * growfs defines, globals, and forward references
506  */
507 #define	NOTENOUGHSPACE 33
508 int		grow;
509 static int	Pflag;		/* probe to which size the fs can be grown */
510 int		ismounted;
511 char		*directory;
512 diskaddr_t	grow_fssize;
513 long		grow_fs_size;
514 long		grow_fs_ncg;
515 diskaddr_t		grow_fs_csaddr;
516 long		grow_fs_cssize;
517 int		grow_fs_clean;
518 struct csum	*grow_fscs;
519 diskaddr_t		grow_sifrag;
520 int		test;
521 int		testforce;
522 diskaddr_t		testfrags;
523 int		inlockexit;
524 int		isbad;
525 
526 void		lockexit(int);
527 void		randomgeneration(void);
528 void		checksummarysize(void);
529 void		checksblock(void);
530 void		growinit(char *);
531 void		checkdev(char *, char  *);
532 void		checkmount(struct mnttab *, char *);
533 struct dinode	*gdinode(ino_t);
534 int		csfraginrange(daddr32_t);
535 struct csfrag	*findcsfrag(daddr32_t, struct csfrag **);
536 void		checkindirect(ino_t, daddr32_t *, daddr32_t, int);
537 void		addcsfrag(ino_t, daddr32_t, struct csfrag **);
538 void		delcsfrag(daddr32_t, struct csfrag **);
539 void		checkdirect(ino_t, daddr32_t *, daddr32_t *, int);
540 void		findcsfragino(void);
541 void		fixindirect(daddr32_t, int);
542 void		fixdirect(caddr_t, daddr32_t, daddr32_t *, int);
543 void		fixcsfragino(void);
544 void		extendsummaryinfo(void);
545 int		notenoughspace(void);
546 void		unalloccsfragino(void);
547 void		unalloccsfragfree(void);
548 void		findcsfragfree(void);
549 void		copycsfragino(void);
550 void		rdcg(long);
551 void		wtcg(void);
552 void		flcg(void);
553 void		allocfrags(long, daddr32_t *, long *);
554 void		alloccsfragino(void);
555 void		alloccsfragfree(void);
556 void		freefrags(daddr32_t, long, long);
557 int		findfreerange(long *, long *);
558 void		resetallocinfo(void);
559 void		extendcg(long);
560 void		ulockfs(void);
561 void		wlockfs(void);
562 void		clockfs(void);
563 void		wtsb(void);
564 static int64_t	checkfragallocated(daddr32_t);
565 static struct csum 	*read_summaryinfo(struct fs *);
566 static diskaddr_t 	probe_summaryinfo();
567 
568 void
569 main(int argc, char *argv[])
570 {
571 	long i, mincpc, mincpg, ibpcl;
572 	long cylno, rpos, blk, j, warn = 0;
573 	long mincpgcnt, maxcpg;
574 	uint64_t used, bpcg, inospercg;
575 	long mapcramped, inodecramped;
576 	long postblsize, rotblsize, totalsbsize;
577 	FILE *mnttab;
578 	struct mnttab mntp;
579 	char *special;
580 	struct statvfs64 fs;
581 	struct dk_cinfo dkcinfo;
582 	char pbuf[sizeof (uint64_t) * 3 + 1];
583 	int width, plen;
584 	uint64_t num;
585 	int c, saverr;
586 	diskaddr_t max_fssize;
587 	long tmpmaxcontig = -1;
588 	struct sigaction sigact;
589 	uint64_t nbytes64;
590 	int remaining_cg;
591 	int do_dot = 0;
592 
593 	(void) setlocale(LC_ALL, "");
594 
595 #if !defined(TEXT_DOMAIN)
596 #define	TEXT_DOMAIN "SYS_TEST"
597 #endif
598 	(void) textdomain(TEXT_DOMAIN);
599 
600 	while ((c = getopt(argc, argv, "F:bmo:VPGM:T:t:")) != EOF) {
601 		switch (c) {
602 
603 		case 'F':
604 			string = optarg;
605 			if (strcmp(string, "ufs") != 0)
606 				usage();
607 			break;
608 
609 		case 'm':	/* return command line used to create this FS */
610 			mflag++;
611 			break;
612 
613 		case 'o':
614 			/*
615 			 * ufs specific options.
616 			 */
617 			string = optarg;
618 			while (*string != '\0') {
619 				if (match("nsect=")) {
620 					nsect = number(DFLNSECT, "nsect", 0);
621 					nsect_flag = RC_KEYWORD;
622 				} else if (match("ntrack=")) {
623 					ntrack = number(DFLNTRAK, "ntrack", 0);
624 					ntrack_flag = RC_KEYWORD;
625 				} else if (match("bsize=")) {
626 					bsize = number(DESBLKSIZE, "bsize", 0);
627 					bsize_flag = RC_KEYWORD;
628 				} else if (match("fragsize=")) {
629 					fragsize = number(DESFRAGSIZE,
630 					    "fragsize", 0);
631 					fragsize_flag = RC_KEYWORD;
632 				} else if (match("cgsize=")) {
633 					cpg = number(DESCPG, "cgsize", 0);
634 					cpg_flag = RC_KEYWORD;
635 				} else if (match("free=")) {
636 					minfree = number(MINFREE, "free",
637 					    ALLOW_PERCENT);
638 					minfree_flag = RC_KEYWORD;
639 				} else if (match("maxcontig=")) {
640 					tmpmaxcontig =
641 					    number(-1, "maxcontig", 0);
642 					maxcontig_flag = RC_KEYWORD;
643 				} else if (match("nrpos=")) {
644 					nrpos = number(NRPOS, "nrpos", 0);
645 					nrpos_flag = RC_KEYWORD;
646 				} else if (match("rps=")) {
647 					rps = number(DEFHZ, "rps", 0);
648 					rps_flag = RC_KEYWORD;
649 				} else if (match("nbpi=")) {
650 					nbpi = number(NBPI, "nbpi", 0);
651 					nbpi_flag = RC_KEYWORD;
652 				} else if (match("opt=")) {
653 					opt = checkopt(string);
654 				} else if (match("mtb=")) {
655 					mtb = checkmtb(string);
656 				} else if (match("apc=")) {
657 					apc = number(0, "apc", 0);
658 					apc_flag = RC_KEYWORD;
659 				} else if (match("gap=")) {
660 					(void) number(0, "gap", ALLOW_MS1);
661 					rotdelay = ROTDELAY;
662 					rotdelay_flag = RC_DEFAULT;
663 				} else if (match("debug=")) {
664 					debug = number(0, "debug", 0);
665 				} else if (match("N")) {
666 					Nflag++;
667 				} else if (match("calcsb")) {
668 					rflag++;
669 					Nflag++;
670 				} else if (match("calcbinsb")) {
671 					rflag++;
672 					Rflag++;
673 					Nflag++;
674 				} else if (*string == '\0') {
675 					break;
676 				} else {
677 					(void) fprintf(stderr, gettext(
678 						"illegal option: %s\n"),
679 						string);
680 					usage();
681 				}
682 
683 				if (*string == ',') string++;
684 				if (*string == ' ') string++;
685 			}
686 			break;
687 
688 		case 'V':
689 			{
690 				char	*opt_text;
691 				int	opt_count;
692 
693 				(void) fprintf(stdout, gettext("mkfs -F ufs "));
694 				for (opt_count = 1; opt_count < argc;
695 								opt_count++) {
696 					opt_text = argv[opt_count];
697 					if (opt_text)
698 					    (void) fprintf(stdout, " %s ",
699 								opt_text);
700 				}
701 				(void) fprintf(stdout, "\n");
702 			}
703 			break;
704 
705 		case 'b':	/* do nothing for this */
706 			break;
707 
708 		case 'M':	/* grow the mounted file system */
709 			directory = optarg;
710 
711 			/* FALLTHROUGH */
712 		case 'G':	/* grow the file system */
713 			grow = 1;
714 			break;
715 		case 'P':	/* probe the file system growing size 	*/
716 			Pflag = 1;
717 			grow = 1; /* probe mode implies fs growing	*/
718 			break;
719 		case 'T':	/* For testing */
720 			testforce = 1;
721 
722 			/* FALLTHROUGH */
723 		case 't':
724 			test = 1;
725 			string = optarg;
726 			testfrags = number(NO_DEFAULT, "testfrags", 0);
727 			break;
728 
729 		case '?':
730 			usage();
731 			break;
732 		}
733 	}
734 #ifdef MKFS_DEBUG
735 	/*
736 	 * Turning on MKFS_DEBUG causes mkfs to produce a filesystem
737 	 * that can be reproduced by setting the time to 0 and seeding
738 	 * the random number generator to a constant.
739 	 */
740 	mkfstime = 0;	/* reproducible results */
741 #else
742 	(void) time(&mkfstime);
743 #endif
744 
745 	if (optind >= (argc - 1)) {
746 		if (optind > (argc - 1)) {
747 			(void) fprintf(stderr,
748 			    gettext("special not specified\n"));
749 			usage();
750 		} else if (mflag == 0) {
751 			(void) fprintf(stderr,
752 			    gettext("size not specified\n"));
753 			usage();
754 		}
755 	}
756 	argc -= optind;
757 	argv = &argv[optind];
758 
759 	fsys = argv[0];
760 	fsi = open64(fsys, O_RDONLY);
761 	if (fsi < 0) {
762 		(void) fprintf(stderr, gettext("%s: cannot open\n"), fsys);
763 		lockexit(32);
764 	}
765 
766 	if (mflag) {
767 		dump_fscmd(fsys, fsi);
768 		lockexit(0);
769 	}
770 
771 	/*
772 	 * The task of setting all of the configuration parameters for a
773 	 * UFS file system is basically a matter of solving n equations
774 	 * in m variables.  Typically, m is greater than n, so there is
775 	 * usually more than one valid solution.  Since this is usually
776 	 * an under-constrained problem, it's not always obvious what the
777 	 * "best" configuration is.
778 	 *
779 	 * In general, the approach is to
780 	 * 1. Determine the values for the file system parameters
781 	 *    that are externally contrained and therefore not adjustable
782 	 *    by mkfs (such as the device's size and maxtransfer size).
783 	 * 2. Acquire the user's requested setting for all configuration
784 	 *    values that can be set on the command line.
785 	 * 3. Determine the final value of all configuration values, by
786 	 *    the following approach:
787 	 *	- set the file system block size (fs_bsize).  Although
788 	 *	  this could be regarded as an adjustable parameter, in
789 	 *	  fact, it's pretty much a constant.  At this time, it's
790 	 *	  generally set to 8k (with older hardware, it can
791 	 *	  sometimes make sense to set it to 4k, but those
792 	 *	  situations are pretty rare now).
793 	 *	- re-adjust the maximum file system size based on the
794 	 *	  value of the file system block size.  Since the
795 	 *	  frag size can't be any larger than a file system
796 	 *	  block, and the number of frags in the file system
797 	 *	  has to fit into 31 bits, the file system block size
798 	 *	  affects the maximum file system size.
799 	 *	- now that the real maximum file system is known, set the
800 	 *	  actual size of the file system to be created to
801 	 *	  MIN(requested size, maximum file system size).
802 	 *	- now validate, and if necessary, adjust the following
803 	 *	  values:
804 	 *		rotdelay
805 	 *		nsect
806 	 *		maxcontig
807 	 *		apc
808 	 *		frag_size
809 	 *		rps
810 	 *		minfree
811 	 *		nrpos
812 	 *		nrack
813 	 *		nbpi
814 	 *	- calculate maxcpg (the maximum value of the cylinders-per-
815 	 *	  cylinder-group configuration parameters).  There are two
816 	 *	  algorithms for calculating maxcpg:  an old one, which is
817 	 *	  used for file systems of less than 1 terabyte, and a
818 	 *	  new one, implemented in the function compute_maxcpg(),
819 	 *	  which is used for file systems of greater than 1 TB.
820 	 *	  The difference between them is that compute_maxcpg()
821 	 *	  really tries to maximize the cpg value.  The old
822 	 *	  algorithm fails to take advantage of smaller frags and
823 	 *	  lower inode density when determining the maximum cpg,
824 	 *	  and thus comes up with much lower numbers in some
825 	 *	  configurations.  At some point, we might use the
826 	 *	  new algorithm for determining maxcpg for all file
827 	 *	  systems, but at this time, the changes implemented for
828 	 *	  multi-terabyte UFS are NOT being automatically applied
829 	 *	  to UFS file systems of less than a terabyte (in the
830 	 *	  interest of not changing existing UFS policy too much
831 	 *	  until the ramifications of the changes are well-understood
832 	 *	  and have been evaluated for their effects on performance.)
833 	 *	- check the current values of the configuration parameters
834 	 *	  against the various constraints imposed by UFS.  These
835 	 *	  include:
836 	 *		* There must be at least one inode in each
837 	 *		  cylinder group.
838 	 *		* The cylinder group overhead block, which
839 	 *		  contains the inode and frag bigmaps, must fit
840 	 *		  within one file system block.
841 	 *		* The space required for inode maps should
842 	 *		  occupy no more than a third of the cylinder
843 	 *		  group overhead block.
844 	 *		* The rotational position tables have to fit
845 	 *		  within the available space in the super block.
846 	 *	  Adjust the configuration values that can be adjusted
847 	 *	  so that these constraints are satisfied.  The
848 	 *	  configuration values that are adjustable are:
849 	 *		* frag size
850 	 *		* cylinders per group
851 	 *		* inode density (can be increased)
852 	 *		* number of rotational positions (the rotational
853 	 *		  position tables are eliminated altogether if
854 	 *		  there isn't enough room for them.)
855 	 * 4. Set the values for all the dependent configuration
856 	 *    values (those that aren't settable on the command
857 	 *    line and which are completely dependent on the
858 	 *    adjustable parameters).  This include cpc (cycles
859 	 *    per cylinder, spc (sectors-per-cylinder), and many others.
860 	 */
861 
862 	max_fssize = get_max_size(fsi);
863 
864 	/*
865 	 * Get and check positional arguments, if any.
866 	 */
867 	switch (argc - 1) {
868 	default:
869 		usage();
870 		/*NOTREACHED*/
871 	case 15:
872 		mtb = checkmtb(argv[15]);
873 		/* FALLTHROUGH */
874 	case 14:
875 		string = argv[14];
876 		tmpmaxcontig = number(-1, "maxcontig", 0);
877 		maxcontig_flag = RC_POSITIONAL;
878 		/* FALLTHROUGH */
879 	case 13:
880 		string = argv[13];
881 		nrpos = number(NRPOS, "nrpos", 0);
882 		nrpos_flag = RC_POSITIONAL;
883 		/* FALLTHROUGH */
884 	case 12:
885 		string = argv[12];
886 		rotdelay = ROTDELAY;
887 		rotdelay_flag = RC_DEFAULT;
888 		/* FALLTHROUGH */
889 	case 11:
890 		string = argv[11];
891 		apc = number(0, "apc", 0);
892 		apc_flag = RC_POSITIONAL;
893 		/* FALLTHROUGH */
894 	case 10:
895 		opt = checkopt(argv[10]);
896 		/* FALLTHROUGH */
897 	case 9:
898 		string = argv[9];
899 		nbpi = number(NBPI, "nbpi", 0);
900 		nbpi_flag = RC_POSITIONAL;
901 		/* FALLTHROUGH */
902 	case 8:
903 		string = argv[8];
904 		rps = number(DEFHZ, "rps", 0);
905 		rps_flag = RC_POSITIONAL;
906 		/* FALLTHROUGH */
907 	case 7:
908 		string = argv[7];
909 		minfree = number(MINFREE, "free", ALLOW_PERCENT);
910 		minfree_flag = RC_POSITIONAL;
911 		/* FALLTHROUGH */
912 	case 6:
913 		string = argv[6];
914 		cpg = number(DESCPG, "cgsize", 0);
915 		cpg_flag = RC_POSITIONAL;
916 		/* FALLTHROUGH */
917 	case 5:
918 		string = argv[5];
919 		fragsize = number(DESFRAGSIZE, "fragsize", 0);
920 		fragsize_flag = RC_POSITIONAL;
921 		/* FALLTHROUGH */
922 	case 4:
923 		string = argv[4];
924 		bsize = number(DESBLKSIZE, "bsize", 0);
925 		bsize_flag = RC_POSITIONAL;
926 		/* FALLTHROUGH */
927 	case 3:
928 		string = argv[3];
929 		ntrack = number(DFLNTRAK, "ntrack", 0);
930 		ntrack_flag = RC_POSITIONAL;
931 		/* FALLTHROUGH */
932 	case 2:
933 		string = argv[2];
934 		nsect = number(DFLNSECT, "nsect", 0);
935 		nsect_flag = RC_POSITIONAL;
936 		/* FALLTHROUGH */
937 	case 1:
938 		string = argv[1];
939 		fssize_db = number(max_fssize, "size", 0);
940 	}
941 
942 
943 	if ((maxcontig_flag == RC_DEFAULT) || (tmpmaxcontig == -1) ||
944 		(maxcontig == -1)) {
945 		long maxtrax = get_max_track_size(fsi);
946 		maxcontig = maxtrax / bsize;
947 
948 	} else {
949 		maxcontig = tmpmaxcontig;
950 	}
951 
952 	if (rotdelay == -1) {	/* default by newfs and mkfs */
953 		rotdelay = ROTDELAY;
954 	}
955 
956 	if (cpg_flag == RC_DEFAULT) { /* If not explicity set, use default */
957 		cpg = DESCPG;
958 	}
959 
960 	/*
961 	 * Now that we have the semi-sane args, either positional, via -o,
962 	 * or by defaulting, handle inter-dependencies and range checks.
963 	 */
964 
965 	/*
966 	 * Settle the file system block size first, since it's a fixed
967 	 * parameter once set and so many other parameters, including
968 	 * max_fssize, depend on it.
969 	 */
970 	range_check(&bsize, "bsize", MINBSIZE, MAXBSIZE, DESBLKSIZE,
971 	    bsize_flag);
972 
973 	if (!POWEROF2(bsize)) {
974 		(void) fprintf(stderr,
975 		    gettext("block size must be a power of 2, not %ld\n"),
976 		    bsize);
977 		bsize = DESBLKSIZE;
978 		(void) fprintf(stderr,
979 		    gettext("mkfs: bsize reset to default %ld\n"),
980 		    bsize);
981 	}
982 
983 	if (fssize_db > max_fssize && validate_size(fsi, fssize_db)) {
984 		(void) fprintf(stderr, gettext(
985 		    "Warning: the requested size of this file system\n"
986 		    "(%lld sectors) is greater than the size of the\n"
987 		    "device reported by the driver (%lld sectors).\n"
988 		    "However, a read of the device at the requested size\n"
989 		    "does succeed, so the requested size will be used.\n"),
990 		    fssize_db, max_fssize);
991 		max_fssize = fssize_db;
992 	}
993 	/*
994 	 * Since the maximum allocatable unit (the frag) must be less than
995 	 * or equal to bsize, and the number of frags must be less than or
996 	 * equal to INT_MAX, the total size of the file system (in
997 	 * bytes) must be less than or equal to bsize * INT_MAX.
998 	 */
999 
1000 	if (max_fssize > ((diskaddr_t)bsize/DEV_BSIZE) * INT_MAX)
1001 		max_fssize = ((diskaddr_t)bsize/DEV_BSIZE) * INT_MAX;
1002 	range_check_64(&fssize_db, "size", 1024LL, max_fssize, max_fssize, 1);
1003 
1004 	if (fssize_db >= SECTORS_PER_TERABYTE) {
1005 		mtb = 'y';
1006 		if (!in_64bit_mode()) {
1007 			(void) fprintf(stderr, gettext(
1008 "mkfs:  Warning: Creating a file system greater than 1 terabyte on a\n"
1009 "       system running a 32-bit kernel.  This file system will not be\n"
1010 "       accessible until the system is rebooted with a 64-bit kernel.\n"));
1011 		}
1012 	}
1013 
1014 
1015 	/*
1016 	 * 32K based on max block size of 64K, and rotational layout
1017 	 * test of nsect <= (256 * sectors/block).  Current block size
1018 	 * limit is not 64K, but it's growing soon.
1019 	 */
1020 	range_check(&nsect, "nsect", 1, 32768, DFLNSECT, nsect_flag);
1021 	range_check(&apc, "apc", 0, nsect - 1, 0, apc_flag);
1022 
1023 	if (mtb == 'y')
1024 		fragsize = bsize;
1025 
1026 	range_check(&fragsize, "fragsize", sectorsize, bsize,
1027 	    MAX(bsize / MAXFRAG, MIN(DESFRAGSIZE, bsize)), fragsize_flag);
1028 
1029 	if ((bsize / MAXFRAG) > fragsize) {
1030 		(void) fprintf(stderr, gettext(
1031 "fragment size %ld is too small, minimum with block size %ld is %ld\n"),
1032 		    fragsize, bsize, bsize / MAXFRAG);
1033 		(void) fprintf(stderr,
1034 		    gettext("mkfs: fragsize reset to minimum %ld\n"),
1035 		    bsize / MAXFRAG);
1036 		fragsize = bsize / MAXFRAG;
1037 	}
1038 
1039 	if (!POWEROF2(fragsize)) {
1040 		(void) fprintf(stderr,
1041 		    gettext("fragment size must be a power of 2, not %ld\n"),
1042 		    fragsize);
1043 		fragsize = MAX(bsize / MAXFRAG, MIN(DESFRAGSIZE, bsize));
1044 		(void) fprintf(stderr,
1045 		    gettext("mkfs: fragsize reset to %ld\n"),
1046 		    fragsize);
1047 	}
1048 
1049 	/* At this point, bsize must be >= fragsize, so no need to check it */
1050 
1051 	if (bsize < PAGESIZE) {
1052 		(void) fprintf(stderr, gettext(
1053 		    "WARNING: filesystem block size (%ld) is smaller than "
1054 		    "memory page size (%ld).\nResulting filesystem can not be "
1055 		    "mounted on this system.\n\n"),
1056 		    bsize, (long)PAGESIZE);
1057 	}
1058 
1059 	range_check(&rps, "rps", 1, 1000, DEFHZ, rps_flag);
1060 	range_check(&minfree, "free", 0, 99, MINFREE, minfree_flag);
1061 	range_check(&nrpos, "nrpos", 1, nsect, MIN(nsect, NRPOS), nrpos_flag);
1062 
1063 	/*
1064 	 * ntrack is the number of tracks per cylinder.
1065 	 * The ntrack value must be between 1 and the total number of
1066 	 * sectors in the file system.
1067 	 */
1068 	range_check(&ntrack, "ntrack", 1,
1069 	    fssize_db > INT_MAX ? INT_MAX : (uint32_t)fssize_db,
1070 	    DFLNTRAK, ntrack_flag);
1071 
1072 	/*
1073 	 * nbpi is variable, but 2MB seems a reasonable upper limit,
1074 	 * as 4MB tends to cause problems (using otherwise-default
1075 	 * parameters).  The true limit is where we end up with one
1076 	 * inode per cylinder group.  If this file system is being
1077 	 * configured for multi-terabyte access, nbpi must be at least 1MB.
1078 	 */
1079 	if (mtb == 'y' && nbpi < MTB_NBPI) {
1080 		(void) fprintf(stderr, gettext("mkfs: bad value for nbpi: "
1081 			"must be at least 1048576 for multi-terabyte, "
1082 			"nbpi reset to default 1048576\n"));
1083 		nbpi = MTB_NBPI;
1084 	}
1085 
1086 	if (mtb == 'y')
1087 		range_check(&nbpi, "nbpi", MTB_NBPI, 2 * MB, MTB_NBPI,
1088 			nbpi_flag);
1089 	else
1090 		range_check(&nbpi, "nbpi", DEV_BSIZE, 2 * MB, NBPI, nbpi_flag);
1091 
1092 	/*
1093 	 * maxcpg is another variably-limited parameter.  Calculate
1094 	 * the limit based on what we've got for its dependent
1095 	 * variables.  Effectively, it's how much space is left in the
1096 	 * superblock after all the other bits are accounted for.  We
1097 	 * only fill in sblock fields so we can use MAXIpG.
1098 	 *
1099 	 * If the calculation of maxcpg below (for the mtb == 'n'
1100 	 * case) is changed, update newfs as well.
1101 	 *
1102 	 * For old-style, non-MTB format file systems, use the old
1103 	 * algorithm for calculating the maximum cylinder group size,
1104 	 * even though it limits the cylinder group more than necessary.
1105 	 * Since layout can affect performance, we don't want to change
1106 	 * the default layout for non-MTB file systems at this time.
1107 	 * However, for MTB file systems, use the new maxcpg calculation,
1108 	 * which really maxes out the cylinder group size.
1109 	 */
1110 
1111 	sblock.fs_bsize = bsize;
1112 	sblock.fs_inopb = sblock.fs_bsize / sizeof (struct dinode);
1113 
1114 	if (mtb == 'n') {
1115 		maxcpg = (bsize - sizeof (struct cg) -
1116 		    howmany(MAXIpG(&sblock), NBBY)) /
1117 		    (sizeof (long) + nrpos * sizeof (short) +
1118 		    nsect / (MAXFRAG * NBBY));
1119 	} else {
1120 		maxcpg = compute_maxcpg(bsize, fragsize, nbpi, nrpos,
1121 		    nsect * ntrack);
1122 	}
1123 
1124 	if (cpg == -1)
1125 		cpg = maxcpg;
1126 	/*
1127 	 * mincpg is variable in complex ways, so we really can't
1128 	 * do a sane lower-end limit check at this point.
1129 	 */
1130 	range_check(&cpg, "cgsize", 1, maxcpg, MIN(maxcpg, DESCPG), cpg_flag);
1131 
1132 	/*
1133 	 * get the controller info
1134 	 */
1135 	ismdd = 0;
1136 	islog = 0;
1137 	islogok = 0;
1138 	waslog = 0;
1139 
1140 	if (ioctl(fsi, DKIOCINFO, &dkcinfo) == 0)
1141 		/*
1142 		 * if it is an MDD (disksuite) device
1143 		 */
1144 		if (dkcinfo.dki_ctype == DKC_MD) {
1145 			ismdd++;
1146 			/*
1147 			 * check the logging device
1148 			 */
1149 			if (ioctl(fsi, _FIOISLOG, NULL) == 0) {
1150 				islog++;
1151 				if (ioctl(fsi, _FIOISLOGOK, NULL) == 0)
1152 					islogok++;
1153 			}
1154 		}
1155 
1156 	/*
1157 	 * Do not grow the file system, but print on stdout the maximum
1158 	 * size in sectors to which the file system can be increased.
1159 	 * The calculated size is limited by fssize_db.
1160 	 * Note that we don't lock the filesystem and therefore under rare
1161 	 * conditions (the filesystem is mounted, the free block count is
1162 	 * almost zero, and the superuser is still changing it) the calculated
1163 	 * size can be imprecise.
1164 	 */
1165 	if (Pflag) {
1166 		(void) printf("%llu\n", probe_summaryinfo());
1167 		exit(0);
1168 	}
1169 
1170 	/*
1171 	 * If we're growing an existing filesystem, then we're about
1172 	 * to start doing things that can require recovery efforts if
1173 	 * we get interrupted, so make sure we get a chance to do so.
1174 	 */
1175 	if (grow) {
1176 		sigact.sa_handler = recover_from_sigint;
1177 		sigemptyset(&sigact.sa_mask);
1178 		sigact.sa_flags = SA_RESTART;
1179 
1180 		if (sigaction(SIGINT, &sigact, (struct sigaction *)NULL) < 0) {
1181 			perror(gettext("Could not register SIGINT handler"));
1182 			lockexit(3);
1183 		}
1184 	}
1185 
1186 	if (!Nflag) {
1187 		/*
1188 		 * Check if MNTTAB is trustable
1189 		 */
1190 		if (statvfs64(MNTTAB, &fs) < 0) {
1191 			(void) fprintf(stderr, gettext("can't statvfs %s\n"),
1192 				MNTTAB);
1193 			exit(32);
1194 		}
1195 
1196 		if (strcmp(MNTTYPE_MNTFS, fs.f_basetype) != 0) {
1197 			(void) fprintf(stderr, gettext(
1198 				"%s file system type is not %s, can't mkfs\n"),
1199 				MNTTAB, MNTTYPE_MNTFS);
1200 			exit(32);
1201 		}
1202 
1203 		special = getfullblkname(fsys);
1204 		checkdev(fsys, special);
1205 
1206 		/*
1207 		 * If we found the block device name,
1208 		 * then check the mount table.
1209 		 * if mounted, and growing write lock the file system
1210 		 *
1211 		 */
1212 		if ((special != NULL) && (*special != '\0')) {
1213 			if ((mnttab = fopen(MNTTAB, "r")) == NULL) {
1214 				(void) fprintf(stderr, gettext(
1215 					"can't open %s\n"), MNTTAB);
1216 				exit(32);
1217 			}
1218 			while ((getmntent(mnttab, &mntp)) == NULL) {
1219 				if (grow) {
1220 					checkmount(&mntp, special);
1221 					continue;
1222 				}
1223 				if (strcmp(special, mntp.mnt_special) == 0) {
1224 					(void) fprintf(stderr, gettext(
1225 					    "%s is mounted, can't mkfs\n"),
1226 					    special);
1227 					exit(32);
1228 				}
1229 			}
1230 			(void) fclose(mnttab);
1231 		}
1232 
1233 		if (directory && (ismounted == 0)) {
1234 			(void) fprintf(stderr, gettext("%s is not mounted\n"),
1235 			    special);
1236 			lockexit(32);
1237 		}
1238 
1239 		fso = (grow) ? open64(fsys, O_WRONLY) : creat64(fsys, 0666);
1240 		if (fso < 0) {
1241 			saverr = errno;
1242 			(void) fprintf(stderr,
1243 			    gettext("%s: cannot create: %s\n"),
1244 			    fsys, strerror(saverr));
1245 			lockexit(32);
1246 		}
1247 
1248 	} else {
1249 
1250 		/*
1251 		 * For the -N case, a file descriptor is needed for the llseek()
1252 		 * in wtfs(). See the comment in wtfs() for more information.
1253 		 *
1254 		 * Get a file descriptor that's read-only so that this code
1255 		 * doesn't accidentally write to the file.
1256 		 */
1257 		fso = open64(fsys, O_RDONLY);
1258 		if (fso < 0) {
1259 			saverr = errno;
1260 			(void) fprintf(stderr, gettext("%s: cannot open: %s\n"),
1261 			    fsys, strerror(saverr));
1262 			lockexit(32);
1263 		}
1264 	}
1265 
1266 	/*
1267 	 * seed random # generator (for ic_generation)
1268 	 */
1269 #ifdef MKFS_DEBUG
1270 	srand48(12962);	/* reproducible results */
1271 #else
1272 	srand48((long)(time((time_t *)NULL) + getpid()));
1273 #endif
1274 
1275 	if (grow) {
1276 		growinit(fsys);
1277 		goto grow00;
1278 	}
1279 
1280 	/*
1281 	 * Validate the given file system size.
1282 	 * Verify that its last block can actually be accessed.
1283 	 *
1284 	 * Note: it's ok to use sblock as a buffer because it is immediately
1285 	 * overwritten by the rdfs() of the superblock in the next line.
1286 	 *
1287 	 * ToDo: Because the size checking is done in rdfs()/wtfs(), the
1288 	 * error message for specifying an illegal size is very unfriendly.
1289 	 * In the future, one could replace the rdfs()/wtfs() calls
1290 	 * below with in-line calls to read() or write(). This allows better
1291 	 * error messages to be put in place.
1292 	 */
1293 	rdfs(fssize_db - 1, (int)sectorsize, (char *)&sblock);
1294 
1295 	/*
1296 	 * make the fs unmountable
1297 	 */
1298 	rdfs((diskaddr_t)(SBOFF / sectorsize), (int)sbsize, (char *)&sblock);
1299 	sblock.fs_magic = -1;
1300 	sblock.fs_clean = FSBAD;
1301 	sblock.fs_state = FSOKAY - sblock.fs_time;
1302 	wtfs((diskaddr_t)(SBOFF / sectorsize), (int)sbsize, (char *)&sblock);
1303 	bzero(&sblock, (size_t)sbsize);
1304 
1305 	sblock.fs_nsect = nsect;
1306 	sblock.fs_ntrak = ntrack;
1307 
1308 	/*
1309 	 * Validate specified/determined spc
1310 	 * and calculate minimum cylinders per group.
1311 	 */
1312 
1313 	/*
1314 	 * sectors/cyl = tracks/cyl * sectors/track
1315 	 */
1316 	sblock.fs_spc = sblock.fs_ntrak * sblock.fs_nsect;
1317 
1318 grow00:
1319 	if (apc_flag) {
1320 		sblock.fs_spc -= apc;
1321 	}
1322 	/*
1323 	 * Have to test for this separately from apc_flag, due to
1324 	 * the growfs case....
1325 	 */
1326 	if (sblock.fs_spc != sblock.fs_ntrak * sblock.fs_nsect) {
1327 		spc_flag = 1;
1328 	}
1329 	if (grow)
1330 		goto grow10;
1331 
1332 	sblock.fs_nrpos = nrpos;
1333 	sblock.fs_bsize = bsize;
1334 	sblock.fs_fsize = fragsize;
1335 	sblock.fs_minfree = minfree;
1336 
1337 grow10:
1338 	if (nbpi < sblock.fs_fsize) {
1339 		(void) fprintf(stderr, gettext(
1340 		"warning: wasteful data byte allocation / inode (nbpi):\n"));
1341 		(void) fprintf(stderr, gettext(
1342 		    "%ld smaller than allocatable fragment size of %d\n"),
1343 		    nbpi, sblock.fs_fsize);
1344 	}
1345 	if (grow)
1346 		goto grow20;
1347 
1348 	if (opt == 's')
1349 		sblock.fs_optim = FS_OPTSPACE;
1350 	else
1351 		sblock.fs_optim = FS_OPTTIME;
1352 
1353 	sblock.fs_bmask = ~(sblock.fs_bsize - 1);
1354 	sblock.fs_fmask = ~(sblock.fs_fsize - 1);
1355 	/*
1356 	 * Planning now for future expansion.
1357 	 */
1358 #if defined(_BIG_ENDIAN)
1359 		sblock.fs_qbmask.val[0] = 0;
1360 		sblock.fs_qbmask.val[1] = ~sblock.fs_bmask;
1361 		sblock.fs_qfmask.val[0] = 0;
1362 		sblock.fs_qfmask.val[1] = ~sblock.fs_fmask;
1363 #endif
1364 #if defined(_LITTLE_ENDIAN)
1365 		sblock.fs_qbmask.val[0] = ~sblock.fs_bmask;
1366 		sblock.fs_qbmask.val[1] = 0;
1367 		sblock.fs_qfmask.val[0] = ~sblock.fs_fmask;
1368 		sblock.fs_qfmask.val[1] = 0;
1369 #endif
1370 	for (sblock.fs_bshift = 0, i = sblock.fs_bsize; i > 1; i >>= 1)
1371 		sblock.fs_bshift++;
1372 	for (sblock.fs_fshift = 0, i = sblock.fs_fsize; i > 1; i >>= 1)
1373 		sblock.fs_fshift++;
1374 	sblock.fs_frag = numfrags(&sblock, sblock.fs_bsize);
1375 	for (sblock.fs_fragshift = 0, i = sblock.fs_frag; i > 1; i >>= 1)
1376 		sblock.fs_fragshift++;
1377 	if (sblock.fs_frag > MAXFRAG) {
1378 		(void) fprintf(stderr, gettext(
1379 	"fragment size %d is too small, minimum with block size %d is %d\n"),
1380 		    sblock.fs_fsize, sblock.fs_bsize,
1381 		    sblock.fs_bsize / MAXFRAG);
1382 		lockexit(32);
1383 	}
1384 	sblock.fs_nindir = sblock.fs_bsize / sizeof (daddr32_t);
1385 	sblock.fs_inopb = sblock.fs_bsize / sizeof (struct dinode);
1386 	sblock.fs_nspf = sblock.fs_fsize / sectorsize;
1387 	for (sblock.fs_fsbtodb = 0, i = NSPF(&sblock); i > 1; i >>= 1)
1388 		sblock.fs_fsbtodb++;
1389 
1390 	/*
1391 	 * Compute the super-block, cylinder group, and inode blocks.
1392 	 * Note that these "blkno" are really fragment addresses.
1393 	 * For example, on an 8K/1K (block/fragment) system, fs_sblkno is 16,
1394 	 * fs_cblkno is 24, and fs_iblkno is 32. This is why CGSIZE is so
1395 	 * important: only 1 FS block is allocated for the cg struct (fragment
1396 	 * numbers 24 through 31).
1397 	 */
1398 	sblock.fs_sblkno =
1399 	    roundup(howmany(bbsize + sbsize, sblock.fs_fsize), sblock.fs_frag);
1400 	sblock.fs_cblkno = (daddr32_t)(sblock.fs_sblkno +
1401 	    roundup(howmany(sbsize, sblock.fs_fsize), sblock.fs_frag));
1402 	sblock.fs_iblkno = sblock.fs_cblkno + sblock.fs_frag;
1403 
1404 	sblock.fs_cgoffset = roundup(
1405 	    howmany(sblock.fs_nsect, NSPF(&sblock)), sblock.fs_frag);
1406 	for (sblock.fs_cgmask = -1, i = sblock.fs_ntrak; i > 1; i >>= 1)
1407 		sblock.fs_cgmask <<= 1;
1408 	if (!POWEROF2(sblock.fs_ntrak))
1409 		sblock.fs_cgmask <<= 1;
1410 	/*
1411 	 * Validate specified/determined spc
1412 	 * and calculate minimum cylinders per group.
1413 	 */
1414 
1415 	for (sblock.fs_cpc = NSPB(&sblock), i = sblock.fs_spc;
1416 	    sblock.fs_cpc > 1 && (i & 1) == 0;
1417 	    sblock.fs_cpc >>= 1, i >>= 1)
1418 		/* void */;
1419 	mincpc = sblock.fs_cpc;
1420 
1421 	/* if these calculations are changed, check dump_fscmd also */
1422 	bpcg = (uint64_t)sblock.fs_spc * sectorsize;
1423 	inospercg = (uint64_t)roundup(bpcg / sizeof (struct dinode),
1424 	    INOPB(&sblock));
1425 	if (inospercg > MAXIpG(&sblock))
1426 		inospercg = MAXIpG(&sblock);
1427 	used = (uint64_t)(sblock.fs_iblkno + inospercg /
1428 	    INOPF(&sblock)) * NSPF(&sblock);
1429 	mincpgcnt = (long)howmany((uint64_t)sblock.fs_cgoffset *
1430 	    (~sblock.fs_cgmask) + used, sblock.fs_spc);
1431 	mincpg = roundup(mincpgcnt, mincpc);
1432 	/*
1433 	 * Insure that cylinder group with mincpg has enough space
1434 	 * for block maps
1435 	 */
1436 	sblock.fs_cpg = mincpg;
1437 	sblock.fs_ipg = (int32_t)inospercg;
1438 	mapcramped = 0;
1439 
1440 	/*
1441 	 * Make sure the cg struct fits within the file system block.
1442 	 * Use larger block sizes until it fits
1443 	 */
1444 	while (CGSIZE(&sblock) > sblock.fs_bsize) {
1445 		mapcramped = 1;
1446 		if (sblock.fs_bsize < MAXBSIZE) {
1447 			sblock.fs_bsize <<= 1;
1448 			if ((i & 1) == 0) {
1449 				i >>= 1;
1450 			} else {
1451 				sblock.fs_cpc <<= 1;
1452 				mincpc <<= 1;
1453 				mincpg = roundup(mincpgcnt, mincpc);
1454 				sblock.fs_cpg = mincpg;
1455 			}
1456 			sblock.fs_frag <<= 1;
1457 			sblock.fs_fragshift += 1;
1458 			if (sblock.fs_frag <= MAXFRAG)
1459 				continue;
1460 		}
1461 
1462 		/*
1463 		 * Looped far enough. The fragment is now as large as the
1464 		 * filesystem block!
1465 		 */
1466 		if (sblock.fs_fsize == sblock.fs_bsize) {
1467 			(void) fprintf(stderr, gettext(
1468 		    "There is no block size that can support this disk\n"));
1469 			lockexit(32);
1470 		}
1471 
1472 		/*
1473 		 * Try a larger fragment. Double the fragment size.
1474 		 */
1475 		sblock.fs_frag >>= 1;
1476 		sblock.fs_fragshift -= 1;
1477 		sblock.fs_fsize <<= 1;
1478 		sblock.fs_nspf <<= 1;
1479 	}
1480 	/*
1481 	 * Insure that cylinder group with mincpg has enough space for inodes
1482 	 */
1483 	inodecramped = 0;
1484 	used *= sectorsize;
1485 	nbytes64 = (uint64_t)mincpg * bpcg - used;
1486 	inospercg = (uint64_t)roundup((nbytes64 / nbpi), INOPB(&sblock));
1487 	sblock.fs_ipg = (int32_t)inospercg;
1488 	while (inospercg > MAXIpG(&sblock)) {
1489 		inodecramped = 1;
1490 		if (mincpc == 1 || sblock.fs_frag == 1 ||
1491 		    sblock.fs_bsize == MINBSIZE)
1492 			break;
1493 		nbytes64 = (uint64_t)mincpg * bpcg - used;
1494 		(void) fprintf(stderr,
1495 		    gettext("With a block size of %d %s %lu\n"),
1496 		    sblock.fs_bsize, gettext("minimum bytes per inode is"),
1497 		    (uint32_t)(nbytes64 / MAXIpG(&sblock) + 1));
1498 		sblock.fs_bsize >>= 1;
1499 		sblock.fs_frag >>= 1;
1500 		sblock.fs_fragshift -= 1;
1501 		mincpc >>= 1;
1502 		sblock.fs_cpg = roundup(mincpgcnt, mincpc);
1503 		if (CGSIZE(&sblock) > sblock.fs_bsize) {
1504 			sblock.fs_bsize <<= 1;
1505 			break;
1506 		}
1507 		mincpg = sblock.fs_cpg;
1508 		nbytes64 = (uint64_t)mincpg * bpcg - used;
1509 		inospercg = (uint64_t)roundup((nbytes64 / nbpi),
1510 			INOPB(&sblock));
1511 		sblock.fs_ipg = (int32_t)inospercg;
1512 	}
1513 	if (inodecramped) {
1514 		if (inospercg > MAXIpG(&sblock)) {
1515 			nbytes64 = (uint64_t)mincpg * bpcg - used;
1516 			(void) fprintf(stderr, gettext(
1517 			    "Minimum bytes per inode is %d\n"),
1518 			    (uint32_t)(nbytes64 / MAXIpG(&sblock) + 1));
1519 		} else if (!mapcramped) {
1520 			(void) fprintf(stderr, gettext(
1521 	    "With %ld bytes per inode, minimum cylinders per group is %ld\n"),
1522 			    nbpi, mincpg);
1523 		}
1524 	}
1525 	if (mapcramped) {
1526 		(void) fprintf(stderr, gettext(
1527 		    "With %d sectors per cylinder, minimum cylinders "
1528 		    "per group is %ld\n"),
1529 		    sblock.fs_spc, mincpg);
1530 	}
1531 	if (inodecramped || mapcramped) {
1532 		/*
1533 		 * To make this at least somewhat comprehensible in
1534 		 * the world of i18n, figure out what we're going to
1535 		 * say and then say it all at one time.  The days of
1536 		 * needing to scrimp on string space are behind us....
1537 		 */
1538 		if ((sblock.fs_bsize != bsize) &&
1539 		    (sblock.fs_fsize != fragsize)) {
1540 			(void) fprintf(stderr, gettext(
1541 	    "This requires the block size to be changed from %ld to %d\n"
1542 	    "and the fragment size to be changed from %ld to %d\n"),
1543 			    bsize, sblock.fs_bsize,
1544 			    fragsize, sblock.fs_fsize);
1545 		} else if (sblock.fs_bsize != bsize) {
1546 			(void) fprintf(stderr, gettext(
1547 	    "This requires the block size to be changed from %ld to %d\n"),
1548 			    bsize, sblock.fs_bsize);
1549 		} else if (sblock.fs_fsize != fragsize) {
1550 			(void) fprintf(stderr, gettext(
1551 	    "This requires the fragment size to be changed from %ld to %d\n"),
1552 			    fragsize, sblock.fs_fsize);
1553 		} else {
1554 			(void) fprintf(stderr, gettext(
1555 	    "Unable to make filesystem fit with the given constraints\n"));
1556 		}
1557 		(void) fprintf(stderr, gettext(
1558 		    "Please re-run mkfs with corrected parameters\n"));
1559 		lockexit(32);
1560 	}
1561 	/*
1562 	 * Calculate the number of cylinders per group
1563 	 */
1564 	sblock.fs_cpg = cpg;
1565 	if (sblock.fs_cpg % mincpc != 0) {
1566 		(void) fprintf(stderr, gettext(
1567 		    "Warning: cylinder groups must have a multiple "
1568 		    "of %ld cylinders with the given\n         parameters\n"),
1569 		    mincpc);
1570 		sblock.fs_cpg = roundup(sblock.fs_cpg, mincpc);
1571 		(void) fprintf(stderr, gettext("Rounded cgsize up to %d\n"),
1572 		    sblock.fs_cpg);
1573 	}
1574 	/*
1575 	 * Must insure there is enough space for inodes
1576 	 */
1577 	/* if these calculations are changed, check dump_fscmd also */
1578 	nbytes64 = (uint64_t)sblock.fs_cpg * bpcg - used;
1579 	sblock.fs_ipg = roundup((uint32_t)(nbytes64 / nbpi), INOPB(&sblock));
1580 
1581 	/*
1582 	 * Slim down cylinders per group, until the inodes can fit.
1583 	 */
1584 	while (sblock.fs_ipg > MAXIpG(&sblock)) {
1585 		inodecramped = 1;
1586 		sblock.fs_cpg -= mincpc;
1587 		nbytes64 = (uint64_t)sblock.fs_cpg * bpcg - used;
1588 		sblock.fs_ipg = roundup((uint32_t)(nbytes64 / nbpi),
1589 			INOPB(&sblock));
1590 	}
1591 	/*
1592 	 * Must insure there is enough space to hold block map.
1593 	 * Cut down on cylinders per group, until the cg struct fits in a
1594 	 * filesystem block.
1595 	 */
1596 	while (CGSIZE(&sblock) > sblock.fs_bsize) {
1597 		mapcramped = 1;
1598 		sblock.fs_cpg -= mincpc;
1599 		nbytes64 = (uint64_t)sblock.fs_cpg * bpcg - used;
1600 		sblock.fs_ipg = roundup((uint32_t)(nbytes64 / nbpi),
1601 			INOPB(&sblock));
1602 	}
1603 	sblock.fs_fpg = (sblock.fs_cpg * sblock.fs_spc) / NSPF(&sblock);
1604 	if ((sblock.fs_cpg * sblock.fs_spc) % NSPB(&sblock) != 0) {
1605 		(void) fprintf(stderr,
1606 		gettext("newfs: panic (fs_cpg * fs_spc) %% NSPF != 0\n"));
1607 		lockexit(32);
1608 	}
1609 	if (sblock.fs_cpg < mincpg) {
1610 		(void) fprintf(stderr, gettext(
1611 "With the given parameters, cgsize must be at least %ld; please re-run mkfs\n"),
1612 			mincpg);
1613 		lockexit(32);
1614 	}
1615 	sblock.fs_cgsize = fragroundup(&sblock, CGSIZE(&sblock));
1616 grow20:
1617 	/*
1618 	 * Now have size for file system and nsect and ntrak.
1619 	 * Determine number of cylinders and blocks in the file system.
1620 	 */
1621 	fssize_frag = (int64_t)dbtofsb(&sblock, fssize_db);
1622 	if (fssize_frag > INT_MAX) {
1623 		(void) fprintf(stderr, gettext(
1624 "There are too many fragments in the system, increase fragment size\n"),
1625 		    mincpg);
1626 		lockexit(32);
1627 	}
1628 	sblock.fs_size = (int32_t)fssize_frag;
1629 	sblock.fs_ncyl = (int32_t)(fssize_frag * NSPF(&sblock) / sblock.fs_spc);
1630 	if (fssize_frag * NSPF(&sblock) >
1631 	    (uint64_t)sblock.fs_ncyl * sblock.fs_spc) {
1632 		sblock.fs_ncyl++;
1633 		warn = 1;
1634 	}
1635 	if (sblock.fs_ncyl < 1) {
1636 		(void) fprintf(stderr, gettext(
1637 			"file systems must have at least one cylinder\n"));
1638 		lockexit(32);
1639 	}
1640 	if (grow)
1641 		goto grow30;
1642 	/*
1643 	 * Determine feasability/values of rotational layout tables.
1644 	 *
1645 	 * The size of the rotational layout tables is limited by the size
1646 	 * of the file system block, fs_bsize.  The amount of space
1647 	 * available for tables is calculated as (fs_bsize - sizeof (struct
1648 	 * fs)).  The size of these tables is inversely proportional to the
1649 	 * block size of the file system. The size increases if sectors per
1650 	 * track are not powers of two, because more cylinders must be
1651 	 * described by the tables before the rotational pattern repeats
1652 	 * (fs_cpc).
1653 	 */
1654 	sblock.fs_postblformat = FS_DYNAMICPOSTBLFMT;
1655 	sblock.fs_sbsize = fragroundup(&sblock, sizeof (struct fs));
1656 	sblock.fs_npsect = sblock.fs_nsect;
1657 	if (sblock.fs_ntrak == 1) {
1658 		sblock.fs_cpc = 0;
1659 		goto next;
1660 	}
1661 	postblsize = sblock.fs_nrpos * sblock.fs_cpc * sizeof (short);
1662 	rotblsize = sblock.fs_cpc * sblock.fs_spc / NSPB(&sblock);
1663 	totalsbsize = sizeof (struct fs) + rotblsize;
1664 
1665 	/* do static allocation if nrpos == 8 and fs_cpc == 16  */
1666 	if (sblock.fs_nrpos == 8 && sblock.fs_cpc <= 16) {
1667 		/* use old static table space */
1668 		sblock.fs_postbloff = (char *)(&sblock.fs_opostbl[0][0]) -
1669 		    (char *)(&sblock.fs_link);
1670 		sblock.fs_rotbloff = &sblock.fs_space[0] -
1671 		    (uchar_t *)(&sblock.fs_link);
1672 	} else {
1673 		/* use 4.3 dynamic table space */
1674 		sblock.fs_postbloff = &sblock.fs_space[0] -
1675 		    (uchar_t *)(&sblock.fs_link);
1676 		sblock.fs_rotbloff = sblock.fs_postbloff + postblsize;
1677 		totalsbsize += postblsize;
1678 	}
1679 	if (totalsbsize > sblock.fs_bsize ||
1680 	    sblock.fs_nsect > (1 << NBBY) * NSPB(&sblock)) {
1681 		(void) fprintf(stderr, gettext(
1682 		    "Warning: insufficient space in super block for\n"
1683 		    "rotational layout tables with nsect %d, ntrack %d, "
1684 		    "and nrpos %d.\nOmitting tables - file system "
1685 		    "performance may be impaired.\n"),
1686 		    sblock.fs_nsect, sblock.fs_ntrak, sblock.fs_nrpos);
1687 
1688 		/*
1689 		 * Setting fs_cpc to 0 tells alloccgblk() in ufs_alloc.c to
1690 		 * ignore the positional layout table and rotational
1691 		 * position table.
1692 		 */
1693 		sblock.fs_cpc = 0;
1694 		goto next;
1695 	}
1696 	sblock.fs_sbsize = fragroundup(&sblock, totalsbsize);
1697 
1698 
1699 	/*
1700 	 * calculate the available blocks for each rotational position
1701 	 */
1702 	for (cylno = 0; cylno < sblock.fs_cpc; cylno++)
1703 		for (rpos = 0; rpos < sblock.fs_nrpos; rpos++)
1704 			fs_postbl(&sblock, cylno)[rpos] = -1;
1705 	for (i = (rotblsize - 1) * sblock.fs_frag;
1706 	    i >= 0; i -= sblock.fs_frag) {
1707 		cylno = cbtocylno(&sblock, i);
1708 		rpos = cbtorpos(&sblock, i);
1709 		blk = fragstoblks(&sblock, i);
1710 		if (fs_postbl(&sblock, cylno)[rpos] == -1)
1711 			fs_rotbl(&sblock)[blk] = 0;
1712 		else
1713 			fs_rotbl(&sblock)[blk] =
1714 			    fs_postbl(&sblock, cylno)[rpos] - blk;
1715 		fs_postbl(&sblock, cylno)[rpos] = blk;
1716 	}
1717 next:
1718 grow30:
1719 	/*
1720 	 * Compute/validate number of cylinder groups.
1721 	 * Note that if an excessively large filesystem is specified
1722 	 * (e.g., more than 16384 cylinders for an 8K filesystem block), it
1723 	 * does not get detected until checksummarysize()
1724 	 */
1725 	sblock.fs_ncg = sblock.fs_ncyl / sblock.fs_cpg;
1726 	if (sblock.fs_ncyl % sblock.fs_cpg)
1727 		sblock.fs_ncg++;
1728 	sblock.fs_dblkno = sblock.fs_iblkno + sblock.fs_ipg / INOPF(&sblock);
1729 	i = MIN(~sblock.fs_cgmask, sblock.fs_ncg - 1);
1730 	ibpcl = cgdmin(&sblock, i) - cgbase(&sblock, i);
1731 	if (ibpcl >= sblock.fs_fpg) {
1732 		(void) fprintf(stderr, gettext(
1733 		    "inode blocks/cyl group (%d) >= data blocks (%d)\n"),
1734 		    cgdmin(&sblock, i) - cgbase(&sblock, i) / sblock.fs_frag,
1735 		    sblock.fs_fpg / sblock.fs_frag);
1736 		if ((ibpcl < 0) || (sblock.fs_fpg < 0)) {
1737 			(void) fprintf(stderr, gettext(
1738 	    "number of cylinders per cylinder group (%d) must be decreased.\n"),
1739 			    sblock.fs_cpg);
1740 		} else {
1741 			(void) fprintf(stderr, gettext(
1742 	    "number of cylinders per cylinder group (%d) must be increased.\n"),
1743 			    sblock.fs_cpg);
1744 		}
1745 		(void) fprintf(stderr, gettext(
1746 "Note that cgsize may have been adjusted to allow struct cg to fit.\n"));
1747 		lockexit(32);
1748 	}
1749 	j = sblock.fs_ncg - 1;
1750 	if ((i = fssize_frag - j * sblock.fs_fpg) < sblock.fs_fpg &&
1751 	    cgdmin(&sblock, j) - cgbase(&sblock, j) > i) {
1752 		(void) fprintf(stderr, gettext(
1753 		    "Warning: inode blocks/cyl group (%d) >= data "
1754 		    "blocks (%ld) in last\n    cylinder group. This "
1755 		    "implies %ld sector(s) cannot be allocated.\n"),
1756 		    (cgdmin(&sblock, j) - cgbase(&sblock, j)) / sblock.fs_frag,
1757 		    i / sblock.fs_frag, i * NSPF(&sblock));
1758 		sblock.fs_ncg--;
1759 		sblock.fs_ncyl -= sblock.fs_ncyl % sblock.fs_cpg;
1760 		sblock.fs_size = fssize_frag =
1761 		    (int64_t)sblock.fs_ncyl * (int64_t)sblock.fs_spc /
1762 		    (int64_t)NSPF(&sblock);
1763 		warn = 0;
1764 	}
1765 	if (warn && !spc_flag) {
1766 		(void) fprintf(stderr, gettext(
1767 		    "Warning: %d sector(s) in last cylinder unallocated\n"),
1768 		    sblock.fs_spc - (uint32_t)(fssize_frag * NSPF(&sblock) -
1769 		    (uint64_t)(sblock.fs_ncyl - 1) * sblock.fs_spc));
1770 	}
1771 	/*
1772 	 * fill in remaining fields of the super block
1773 	 */
1774 
1775 	/*
1776 	 * The csum records are stored in cylinder group 0, starting at
1777 	 * cgdmin, the first data block.
1778 	 */
1779 	sblock.fs_csaddr = cgdmin(&sblock, 0);
1780 	sblock.fs_cssize =
1781 	    fragroundup(&sblock, sblock.fs_ncg * sizeof (struct csum));
1782 	i = sblock.fs_bsize / sizeof (struct csum);
1783 	sblock.fs_csmask = ~(i - 1);
1784 	for (sblock.fs_csshift = 0; i > 1; i >>= 1)
1785 		sblock.fs_csshift++;
1786 	fscs = (struct csum *)calloc(1, sblock.fs_cssize);
1787 
1788 	checksummarysize();
1789 	if (mtb == 'y') {
1790 		sblock.fs_magic = MTB_UFS_MAGIC;
1791 		sblock.fs_version = MTB_UFS_VERSION_1;
1792 	} else {
1793 		sblock.fs_magic = FS_MAGIC;
1794 	}
1795 
1796 	if (grow) {
1797 		bcopy((caddr_t)grow_fscs, (caddr_t)fscs, (int)grow_fs_cssize);
1798 		extendsummaryinfo();
1799 		goto grow40;
1800 	}
1801 	sblock.fs_rotdelay = rotdelay;
1802 	sblock.fs_maxcontig = maxcontig;
1803 	sblock.fs_maxbpg = MAXBLKPG(sblock.fs_bsize);
1804 
1805 	sblock.fs_rps = rps;
1806 	sblock.fs_cgrotor = 0;
1807 	sblock.fs_cstotal.cs_ndir = 0;
1808 	sblock.fs_cstotal.cs_nbfree = 0;
1809 	sblock.fs_cstotal.cs_nifree = 0;
1810 	sblock.fs_cstotal.cs_nffree = 0;
1811 	sblock.fs_fmod = 0;
1812 	sblock.fs_ronly = 0;
1813 	sblock.fs_time = mkfstime;
1814 	sblock.fs_state = FSOKAY - sblock.fs_time;
1815 	sblock.fs_clean = FSCLEAN;
1816 grow40:
1817 
1818 	/*
1819 	 * If all that's needed is a dump of the superblock we
1820 	 * would use by default, we've got it now.  So, splat it
1821 	 * out and leave.
1822 	 */
1823 	if (rflag) {
1824 		dump_sblock();
1825 		lockexit(0);
1826 	}
1827 	/*
1828 	 * Dump out summary information about file system.
1829 	 */
1830 	(void) fprintf(stderr, gettext(
1831 	    "%s:\t%lld sectors in %d cylinders of %d tracks, %d sectors\n"),
1832 	    fsys, (uint64_t)sblock.fs_size * NSPF(&sblock), sblock.fs_ncyl,
1833 	    sblock.fs_ntrak, sblock.fs_nsect);
1834 	(void) fprintf(stderr, gettext(
1835 	    "\t%.1fMB in %d cyl groups (%d c/g, %.2fMB/g, %d i/g)\n"),
1836 	    (float)sblock.fs_size * sblock.fs_fsize / MB, sblock.fs_ncg,
1837 	    sblock.fs_cpg, (float)sblock.fs_fpg * sblock.fs_fsize / MB,
1838 	    sblock.fs_ipg);
1839 	/*
1840 	 * Now build the cylinders group blocks and
1841 	 * then print out indices of cylinder groups.
1842 	 */
1843 	(void) fprintf(stderr, gettext(
1844 	    "super-block backups (for fsck -F ufs -o b=#) at:\n"));
1845 	for (width = cylno = 0; cylno < sblock.fs_ncg && cylno < 10; cylno++) {
1846 		if ((grow == 0) || (cylno >= grow_fs_ncg))
1847 			initcg(cylno);
1848 		num = fsbtodb(&sblock, (uint64_t)cgsblock(&sblock, cylno));
1849 		(void) sprintf(pbuf, " %llu,", num);
1850 		plen = strlen(pbuf);
1851 		if ((width + plen) > (WIDTH - 1)) {
1852 			width = plen;
1853 			(void) fprintf(stderr, "\n");
1854 		} else {
1855 			width += plen;
1856 		}
1857 		(void) fprintf(stderr, "%s", pbuf);
1858 	}
1859 	(void) fprintf(stderr, "\n");
1860 
1861 	remaining_cg = sblock.fs_ncg - cylno;
1862 
1863 	/*
1864 	 * If there are more than 300 cylinder groups still to be
1865 	 * initialized, print a "." for every 50 cylinder groups.
1866 	 */
1867 	if (remaining_cg > 300) {
1868 		(void) fprintf(stderr, gettext(
1869 		    "Initializing cylinder groups:\n"));
1870 		do_dot = 1;
1871 	}
1872 
1873 	/*
1874 	 * Now initialize all cylinder groups between the first ten
1875 	 * and the last ten.
1876 	 *
1877 	 * If the number of cylinder groups was less than 10, all of the
1878 	 * cylinder group offsets would have printed in the last loop
1879 	 * and cylno will already be equal to sblock.fs_ncg and so this
1880 	 * loop will not be entered.  If there are less than 20 cylinder
1881 	 * groups, cylno is already less than fs_ncg - 10, so this loop
1882 	 * won't be entered in that case either.
1883 	 */
1884 
1885 	i = 0;
1886 	for (; cylno < sblock.fs_ncg - 10; cylno++) {
1887 		if ((grow == 0) || (cylno >= grow_fs_ncg))
1888 			initcg(cylno);
1889 		if (do_dot && cylno % 50 == 0) {
1890 			(void) fprintf(stderr, ".");
1891 			i++;
1892 			if (i == WIDTH - 1) {
1893 				(void) fprintf(stderr, "\n");
1894 				i = 0;
1895 			}
1896 		}
1897 	}
1898 
1899 	/*
1900 	 * Now print the cylinder group offsets for the last 10
1901 	 * cylinder groups, if any are left.
1902 	 */
1903 
1904 	if (do_dot) {
1905 		(void) fprintf(stderr, gettext(
1906 	    "\nsuper-block backups for last 10 cylinder groups at:\n"));
1907 	}
1908 	for (width = 0; cylno < sblock.fs_ncg; cylno++) {
1909 		if ((grow == 0) || (cylno >= grow_fs_ncg))
1910 			initcg(cylno);
1911 		num = fsbtodb(&sblock, (uint64_t)cgsblock(&sblock, cylno));
1912 		(void) sprintf(pbuf, " %llu,", num);
1913 		plen = strlen(pbuf);
1914 		if ((width + plen) > (WIDTH - 1)) {
1915 			width = plen;
1916 			(void) fprintf(stderr, "\n");
1917 		} else {
1918 			width += plen;
1919 		}
1920 		(void) fprintf(stderr, "%s", pbuf);
1921 	}
1922 	(void) fprintf(stderr, "\n");
1923 	if (Nflag)
1924 		lockexit(0);
1925 	if (grow)
1926 		goto grow50;
1927 
1928 	/*
1929 	 * Now construct the initial file system,
1930 	 * then write out the super-block.
1931 	 */
1932 	fsinit();
1933 grow50:
1934 	/*
1935 	 * write the superblock and csum information
1936 	 */
1937 	wtsb();
1938 
1939 	/*
1940 	 * extend the last cylinder group in the original file system
1941 	 */
1942 	if (grow) {
1943 		extendcg(grow_fs_ncg-1);
1944 		wtsb();
1945 	}
1946 
1947 	/*
1948 	 * Write out the duplicate super blocks to the first 10
1949 	 * cylinder groups (or fewer, if there are fewer than 10
1950 	 * cylinder groups).
1951 	 */
1952 	for (cylno = 0; cylno < sblock.fs_ncg && cylno < 10; cylno++)
1953 		awtfs(fsbtodb(&sblock, (uint64_t)cgsblock(&sblock, cylno)),
1954 		    (int)sbsize, (char *)&sblock, SAVE);
1955 
1956 	/*
1957 	 * Now write out duplicate super blocks to the remaining
1958 	 * cylinder groups.  In the case of multi-terabyte file
1959 	 * systems, just write out the super block to the last ten
1960 	 * cylinder groups (or however many are left).
1961 	 */
1962 	if (mtb == 'y') {
1963 		if (sblock.fs_ncg <= 10)
1964 			cylno = sblock.fs_ncg;
1965 		else if (sblock.fs_ncg <= 20)
1966 			cylno = 10;
1967 		else
1968 			cylno = sblock.fs_ncg - 10;
1969 	}
1970 
1971 	for (; cylno < sblock.fs_ncg; cylno++)
1972 		awtfs(fsbtodb(&sblock, (uint64_t)cgsblock(&sblock, cylno)),
1973 		    (int)sbsize, (char *)&sblock, SAVE);
1974 
1975 	/*
1976 	 * Flush out all the AIO writes we've done.  It's not
1977 	 * necessary to do this explicitly, but it's the only
1978 	 * way to report any errors from those writes.
1979 	 */
1980 	flush_writes();
1981 
1982 	/*
1983 	 * set clean flag
1984 	 */
1985 	if (grow)
1986 		sblock.fs_clean = grow_fs_clean;
1987 	else
1988 		sblock.fs_clean = FSCLEAN;
1989 	sblock.fs_time = mkfstime;
1990 	sblock.fs_state = FSOKAY - sblock.fs_time;
1991 	wtfs((diskaddr_t)(SBOFF / sectorsize), sbsize, (char *)&sblock);
1992 	isbad = 0;
1993 
1994 	if (fsync(fso) == -1) {
1995 		saverr = errno;
1996 		(void) fprintf(stderr,
1997 		    gettext("mkfs: fsync failed on write disk: %s\n"),
1998 		    strerror(saverr));
1999 		/* we're just cleaning up, so keep going */
2000 	}
2001 	if (close(fsi) == -1) {
2002 		saverr = errno;
2003 		(void) fprintf(stderr,
2004 		    gettext("mkfs: close failed on read disk: %s\n"),
2005 		    strerror(saverr));
2006 		/* we're just cleaning up, so keep going */
2007 	}
2008 	if (close(fso) == -1) {
2009 		saverr = errno;
2010 		(void) fprintf(stderr,
2011 		    gettext("mkfs: close failed on write disk: %s\n"),
2012 		    strerror(saverr));
2013 		/* we're just cleaning up, so keep going */
2014 	}
2015 	fsi = fso = -1;
2016 
2017 #ifndef STANDALONE
2018 	lockexit(0);
2019 #endif
2020 }
2021 
2022 /*
2023  * Figure out how big the partition we're dealing with is.
2024  * The value returned is in disk blocks (sectors);
2025  */
2026 static diskaddr_t
2027 get_max_size(int fd)
2028 {
2029 	struct vtoc vtoc;
2030 	dk_gpt_t *efi_vtoc;
2031 	int	is_efi = 0;
2032 	diskaddr_t	slicesize;
2033 
2034 	int index = read_vtoc(fd, &vtoc);
2035 
2036 	if (index < 0) {
2037 		if (index == VT_ENOTSUP || index == VT_ERROR) {
2038 			/* it might be an EFI label */
2039 			is_efi = 1;
2040 			index = efi_alloc_and_read(fd, &efi_vtoc);
2041 		}
2042 	}
2043 
2044 	if (index < 0) {
2045 		switch (index) {
2046 		case VT_ERROR:
2047 			break;
2048 		case VT_EIO:
2049 			errno = EIO;
2050 			break;
2051 		case VT_EINVAL:
2052 			errno = EINVAL;
2053 		}
2054 		perror(gettext("Can not determine partition size"));
2055 		lockexit(32);
2056 	}
2057 
2058 	if (is_efi) {
2059 		slicesize = efi_vtoc->efi_parts[index].p_size;
2060 		efi_free(efi_vtoc);
2061 	} else {
2062 		/*
2063 		 * In the vtoc struct, p_size is a 32-bit signed quantity.
2064 		 * In the dk_gpt struct (efi's version of the vtoc), p_size
2065 		 * is an unsigned 64-bit quantity.  By casting the vtoc's
2066 		 * psize to an unsigned 32-bit quantity, it will be copied
2067 		 * to 'slicesize' (an unsigned 64-bit diskaddr_t) without
2068 		 * sign extension.
2069 		 */
2070 
2071 		slicesize = (uint32_t)vtoc.v_part[index].p_size;
2072 	}
2073 
2074 	if (debug) {
2075 		(void) fprintf(stderr,
2076 		    "get_max_size: index = %d, p_size = %lld, dolimit = %d\n",
2077 		    index, slicesize, (slicesize > FS_MAX));
2078 	}
2079 
2080 	/*
2081 	 * The next line limits a UFS file system to the maximum
2082 	 * supported size.
2083 	 */
2084 
2085 	if (slicesize > FS_MAX)
2086 		return (FS_MAX);
2087 	return (slicesize);
2088 }
2089 
2090 static long
2091 get_max_track_size(int fd)
2092 {
2093 	struct dk_cinfo ci;
2094 	long track_size = -1;
2095 
2096 	if (ioctl(fd, DKIOCINFO, &ci) == 0) {
2097 		track_size = ci.dki_maxtransfer * DEV_BSIZE;
2098 	}
2099 
2100 	if ((track_size < 0)) {
2101 		int	error = 0;
2102 		int	maxphys;
2103 		int	gotit = 0;
2104 
2105 		gotit = fsgetmaxphys(&maxphys, &error);
2106 		if (gotit) {
2107 			track_size = MIN(MB, maxphys);
2108 		} else {
2109 			(void) fprintf(stderr, gettext(
2110 "Warning: Could not get system value for maxphys. The value for\n"
2111 "maxcontig will default to 1MB.\n"));
2112 			track_size = MB;
2113 		}
2114 	}
2115 	return (track_size);
2116 }
2117 
2118 /*
2119  * Initialize a cylinder group.
2120  */
2121 static void
2122 initcg(int cylno)
2123 {
2124 	diskaddr_t cbase, d;
2125 	diskaddr_t dlower;	/* last data block before cg metadata */
2126 	diskaddr_t dupper;	/* first data block after cg metadata */
2127 	diskaddr_t dmax;
2128 	int64_t i;
2129 	struct csum *cs;
2130 	struct dinode *inode_buffer;
2131 	int size;
2132 
2133 	/*
2134 	 * Variables used to store intermediate results as a part of
2135 	 * the internal implementation of the cbtocylno() macros.
2136 	 */
2137 	diskaddr_t bno;		/* UFS block number (not sector number) */
2138 	int	cbcylno;	/* current cylinder number */
2139 	int	cbcylno_sect;	/* sector offset within cylinder */
2140 	int	cbsect_incr;	/* amount to increment sector offset */
2141 
2142 	/*
2143 	 * Variables used to store intermediate results as a part of
2144 	 * the internal implementation of the cbtorpos() macros.
2145 	 */
2146 	short	*cgblks;	/* pointer to array of free blocks in cg */
2147 	int	trackrpos;	/* tmp variable for rotation position */
2148 	int	trackoff;	/* offset within a track */
2149 	int	trackoff_incr;	/* amount to increment trackoff */
2150 	int	rpos;		/* rotation position of current block */
2151 	int	rpos_incr;	/* amount to increment rpos per block */
2152 
2153 	union cgun *icgun;	/* local pointer to a cg summary block */
2154 #define	icg	(icgun->cg)
2155 
2156 	icgun = (union cgun *)getbuf(&cgsumbuf, sizeof (union cgun));
2157 
2158 	/*
2159 	 * Determine block bounds for cylinder group.
2160 	 * Allow space for super block summary information in first
2161 	 * cylinder group.
2162 	 */
2163 	cbase = cgbase(&sblock, cylno);
2164 	dmax = cbase + sblock.fs_fpg;
2165 	if (dmax > sblock.fs_size)	/* last cg may be smaller than normal */
2166 		dmax = sblock.fs_size;
2167 	dlower = cgsblock(&sblock, cylno) - cbase;
2168 	dupper = cgdmin(&sblock, cylno) - cbase;
2169 	if (cylno == 0)
2170 		dupper += howmany(sblock.fs_cssize, sblock.fs_fsize);
2171 	cs = fscs + cylno;
2172 	icg.cg_time = mkfstime;
2173 	icg.cg_magic = CG_MAGIC;
2174 	icg.cg_cgx = cylno;
2175 	/* last one gets whatever's left */
2176 	if (cylno == sblock.fs_ncg - 1)
2177 		icg.cg_ncyl = sblock.fs_ncyl - (sblock.fs_cpg * cylno);
2178 	else
2179 		icg.cg_ncyl = sblock.fs_cpg;
2180 	icg.cg_niblk = sblock.fs_ipg;
2181 	icg.cg_ndblk = dmax - cbase;
2182 	icg.cg_cs.cs_ndir = 0;
2183 	icg.cg_cs.cs_nffree = 0;
2184 	icg.cg_cs.cs_nbfree = 0;
2185 	icg.cg_cs.cs_nifree = 0;
2186 	icg.cg_rotor = 0;
2187 	icg.cg_frotor = 0;
2188 	icg.cg_irotor = 0;
2189 	icg.cg_btotoff = &icg.cg_space[0] - (uchar_t *)(&icg.cg_link);
2190 	icg.cg_boff = icg.cg_btotoff + sblock.fs_cpg * sizeof (long);
2191 	icg.cg_iusedoff = icg.cg_boff +
2192 		sblock.fs_cpg * sblock.fs_nrpos * sizeof (short);
2193 	icg.cg_freeoff = icg.cg_iusedoff + howmany(sblock.fs_ipg, NBBY);
2194 	icg.cg_nextfreeoff = icg.cg_freeoff +
2195 		howmany(sblock.fs_cpg * sblock.fs_spc / NSPF(&sblock), NBBY);
2196 	for (i = 0; i < sblock.fs_frag; i++) {
2197 		icg.cg_frsum[i] = 0;
2198 	}
2199 	bzero((caddr_t)cg_inosused(&icg), icg.cg_freeoff - icg.cg_iusedoff);
2200 	icg.cg_cs.cs_nifree += sblock.fs_ipg;
2201 	if (cylno == 0)
2202 		for (i = 0; i < UFSROOTINO; i++) {
2203 			setbit(cg_inosused(&icg), i);
2204 			icg.cg_cs.cs_nifree--;
2205 		}
2206 
2207 	/*
2208 	 * Initialize all the inodes in the cylinder group using
2209 	 * random numbers.
2210 	 */
2211 	size = sblock.fs_ipg * sizeof (struct dinode);
2212 	inode_buffer = (struct dinode *)getbuf(&inodebuf, size);
2213 
2214 	for (i = 0; i < sblock.fs_ipg; i++) {
2215 		IRANDOMIZE(&(inode_buffer[i].di_ic));
2216 	}
2217 
2218 	/*
2219 	 * Write all inodes in a single write for performance.
2220 	 */
2221 	awtfs(fsbtodb(&sblock, (uint64_t)cgimin(&sblock, cylno)), (int)size,
2222 	    (char *)inode_buffer, RELEASE);
2223 
2224 	bzero((caddr_t)cg_blktot(&icg), icg.cg_boff - icg.cg_btotoff);
2225 	bzero((caddr_t)cg_blks(&sblock, &icg, 0),
2226 	    icg.cg_iusedoff - icg.cg_boff);
2227 	bzero((caddr_t)cg_blksfree(&icg), icg.cg_nextfreeoff - icg.cg_freeoff);
2228 
2229 	if (cylno > 0) {
2230 		for (d = 0; d < dlower; d += sblock.fs_frag) {
2231 			setblock(&sblock, cg_blksfree(&icg), d/sblock.fs_frag);
2232 			icg.cg_cs.cs_nbfree++;
2233 			cg_blktot(&icg)[cbtocylno(&sblock, d)]++;
2234 			cg_blks(&sblock, &icg, cbtocylno(&sblock, d))
2235 			    [cbtorpos(&sblock, d)]++;
2236 		}
2237 		sblock.fs_dsize += dlower;
2238 	}
2239 	sblock.fs_dsize += icg.cg_ndblk - dupper;
2240 	if ((i = dupper % sblock.fs_frag) != 0) {
2241 		icg.cg_frsum[sblock.fs_frag - i]++;
2242 		for (d = dupper + sblock.fs_frag - i; dupper < d; dupper++) {
2243 			setbit(cg_blksfree(&icg), dupper);
2244 			icg.cg_cs.cs_nffree++;
2245 		}
2246 	}
2247 
2248 	/*
2249 	 * WARNING: The following code is somewhat confusing, but
2250 	 * results in a substantial performance improvement in mkfs.
2251 	 *
2252 	 * Instead of using cbtocylno() and cbtorpos() macros, we
2253 	 * keep track of all the intermediate state of those macros
2254 	 * in some variables.  This allows simple addition to be
2255 	 * done to calculate the results as we step through the
2256 	 * blocks in an orderly fashion instead of the slower
2257 	 * multiplication and division the macros are forced to
2258 	 * used so they can support random input.  (Multiplication,
2259 	 * division, and remainder operations typically take about
2260 	 * 10x as many processor cycles as other operations.)
2261 	 *
2262 	 * The basic idea is to take code:
2263 	 *
2264 	 *	for (x = starting_x; x < max; x++)
2265 	 *		y = (x * c) / z
2266 	 *
2267 	 * and rewrite it to take advantage of the fact that
2268 	 * the variable x is incrementing in an orderly way:
2269 	 *
2270 	 *	intermediate = starting_x * c
2271 	 *	yval = intermediate / z
2272 	 *	for (x = starting_x; x < max; x++) {
2273 	 *		y = yval;
2274 	 *		intermediate += c
2275 	 *		if (intermediate > z) {
2276 	 *			yval++;
2277 	 *			intermediate -= z
2278 	 *		}
2279 	 *	}
2280 	 *
2281 	 * Performance has improved as much as 4X using this code.
2282 	 */
2283 
2284 	/*
2285 	 * Initialize the starting points for all the cbtocylno()
2286 	 * macro variables and figure out the increments needed each
2287 	 * time through the loop.
2288 	 */
2289 	cbcylno_sect = dupper * NSPF(&sblock);
2290 	cbsect_incr = sblock.fs_frag * NSPF(&sblock);
2291 	cbcylno = cbcylno_sect / sblock.fs_spc;
2292 	cbcylno_sect %= sblock.fs_spc;
2293 	cgblks = cg_blks(&sblock, &icg, cbcylno);
2294 	bno = dupper / sblock.fs_frag;
2295 
2296 	/*
2297 	 * Initialize the starting points for all the cbtorpos()
2298 	 * macro variables and figure out the increments needed each
2299 	 * time through the loop.
2300 	 *
2301 	 * It's harder to simplify the cbtorpos() macro if there were
2302 	 * alternate sectors specified (or if they previously existed
2303 	 * in the growfs case).  Since this is rare, we just revert to
2304 	 * using the macros in this case and skip the variable setup.
2305 	 */
2306 	if (!spc_flag) {
2307 		trackrpos = (cbcylno_sect % sblock.fs_nsect) * sblock.fs_nrpos;
2308 		rpos = trackrpos / sblock.fs_nsect;
2309 		trackoff = trackrpos % sblock.fs_nsect;
2310 		trackoff_incr = cbsect_incr * sblock.fs_nrpos;
2311 		rpos_incr = (trackoff_incr / sblock.fs_nsect) % sblock.fs_nrpos;
2312 		trackoff_incr = trackoff_incr % sblock.fs_nsect;
2313 	}
2314 
2315 	/*
2316 	 * Loop through all the blocks, marking them free and
2317 	 * updating totals kept in the superblock and cg summary.
2318 	 */
2319 	for (d = dupper; d + sblock.fs_frag <= dmax - cbase; ) {
2320 		setblock(&sblock, cg_blksfree(&icg),  bno);
2321 		icg.cg_cs.cs_nbfree++;
2322 
2323 		cg_blktot(&icg)[cbcylno]++;
2324 
2325 		if (!spc_flag)
2326 			cgblks[rpos]++;
2327 		else
2328 			cg_blks(&sblock, &icg, cbtocylno(&sblock, d))
2329 			    [cbtorpos(&sblock, d)]++;
2330 
2331 		d += sblock.fs_frag;
2332 		bno++;
2333 
2334 		/*
2335 		 * Increment the sector offset within the cylinder
2336 		 * for the cbtocylno() macro reimplementation.  If
2337 		 * we're beyond the end of the cylinder, update the
2338 		 * cylinder number, calculate the offset in the
2339 		 * new cylinder, and update the cgblks pointer
2340 		 * to the next rotational position.
2341 		 */
2342 		cbcylno_sect += cbsect_incr;
2343 		if (cbcylno_sect >= sblock.fs_spc) {
2344 			cbcylno++;
2345 			cbcylno_sect -= sblock.fs_spc;
2346 			cgblks += sblock.fs_nrpos;
2347 		}
2348 
2349 		/*
2350 		 * If there aren't alternate sectors, increment the
2351 		 * rotational position variables for the cbtorpos()
2352 		 * reimplementation.  Note that we potentially
2353 		 * increment rpos twice.  Once by rpos_incr, and one
2354 		 * more time when we wrap to a new track because
2355 		 * trackoff >= fs_nsect.
2356 		 */
2357 		if (!spc_flag) {
2358 			trackoff += trackoff_incr;
2359 			rpos += rpos_incr;
2360 			if (trackoff >= sblock.fs_nsect) {
2361 				trackoff -= sblock.fs_nsect;
2362 				rpos++;
2363 			}
2364 			if (rpos >= sblock.fs_nrpos)
2365 				rpos -= sblock.fs_nrpos;
2366 		}
2367 	}
2368 
2369 	if (d < dmax - cbase) {
2370 		icg.cg_frsum[dmax - cbase - d]++;
2371 		for (; d < dmax - cbase; d++) {
2372 			setbit(cg_blksfree(&icg), d);
2373 			icg.cg_cs.cs_nffree++;
2374 		}
2375 	}
2376 	sblock.fs_cstotal.cs_ndir += icg.cg_cs.cs_ndir;
2377 	sblock.fs_cstotal.cs_nffree += icg.cg_cs.cs_nffree;
2378 	sblock.fs_cstotal.cs_nbfree += icg.cg_cs.cs_nbfree;
2379 	sblock.fs_cstotal.cs_nifree += icg.cg_cs.cs_nifree;
2380 	*cs = icg.cg_cs;
2381 	awtfs(fsbtodb(&sblock, (uint64_t)cgtod(&sblock, cylno)),
2382 		sblock.fs_bsize, (char *)&icg, RELEASE);
2383 }
2384 
2385 /*
2386  * initialize the file system
2387  */
2388 struct inode node;
2389 
2390 #define	LOSTDIR
2391 #ifdef LOSTDIR
2392 #define	PREDEFDIR 3
2393 #else
2394 #define	PREDEFDIR 2
2395 #endif
2396 
2397 struct direct root_dir[] = {
2398 	{ UFSROOTINO, sizeof (struct direct), 1, "." },
2399 	{ UFSROOTINO, sizeof (struct direct), 2, ".." },
2400 #ifdef LOSTDIR
2401 	{ LOSTFOUNDINO, sizeof (struct direct), 10, "lost+found" },
2402 #endif
2403 };
2404 #ifdef LOSTDIR
2405 struct direct lost_found_dir[] = {
2406 	{ LOSTFOUNDINO, sizeof (struct direct), 1, "." },
2407 	{ UFSROOTINO, sizeof (struct direct), 2, ".." },
2408 	{ 0, DIRBLKSIZ, 0, 0 },
2409 };
2410 #endif
2411 char buf[MAXBSIZE];
2412 
2413 static void
2414 fsinit()
2415 {
2416 	int i;
2417 
2418 
2419 	/*
2420 	 * initialize the node
2421 	 */
2422 	node.i_atime = mkfstime;
2423 	node.i_mtime = mkfstime;
2424 	node.i_ctime = mkfstime;
2425 #ifdef LOSTDIR
2426 	/*
2427 	 * create the lost+found directory
2428 	 */
2429 	(void) makedir(lost_found_dir, 2);
2430 	for (i = DIRBLKSIZ; i < sblock.fs_bsize; i += DIRBLKSIZ) {
2431 		bcopy(&lost_found_dir[2], &buf[i], DIRSIZ(&lost_found_dir[2]));
2432 	}
2433 	node.i_number = LOSTFOUNDINO;
2434 	node.i_smode = node.i_mode = IFDIR | 0700;
2435 	node.i_nlink = 2;
2436 	node.i_size = sblock.fs_bsize;
2437 	node.i_db[0] = alloc((int)node.i_size, node.i_mode);
2438 	node.i_blocks = btodb(fragroundup(&sblock, (int)node.i_size));
2439 	IRANDOMIZE(&node.i_ic);
2440 	wtfs(fsbtodb(&sblock, (uint64_t)node.i_db[0]), (int)node.i_size, buf);
2441 	iput(&node);
2442 #endif
2443 	/*
2444 	 * create the root directory
2445 	 */
2446 	node.i_number = UFSROOTINO;
2447 	node.i_mode = node.i_smode = IFDIR | UMASK;
2448 	node.i_nlink = PREDEFDIR;
2449 	node.i_size = makedir(root_dir, PREDEFDIR);
2450 	node.i_db[0] = alloc(sblock.fs_fsize, node.i_mode);
2451 	/* i_size < 2GB because we are initializing the file system */
2452 	node.i_blocks = btodb(fragroundup(&sblock, (int)node.i_size));
2453 	IRANDOMIZE(&node.i_ic);
2454 	wtfs(fsbtodb(&sblock, (uint64_t)node.i_db[0]), sblock.fs_fsize, buf);
2455 	iput(&node);
2456 }
2457 
2458 /*
2459  * construct a set of directory entries in "buf".
2460  * return size of directory.
2461  */
2462 static int
2463 makedir(struct direct *protodir, int entries)
2464 {
2465 	char *cp;
2466 	int i;
2467 	ushort_t spcleft;
2468 
2469 	spcleft = DIRBLKSIZ;
2470 	for (cp = buf, i = 0; i < entries - 1; i++) {
2471 		protodir[i].d_reclen = DIRSIZ(&protodir[i]);
2472 		bcopy(&protodir[i], cp, protodir[i].d_reclen);
2473 		cp += protodir[i].d_reclen;
2474 		spcleft -= protodir[i].d_reclen;
2475 	}
2476 	protodir[i].d_reclen = spcleft;
2477 	bcopy(&protodir[i], cp, DIRSIZ(&protodir[i]));
2478 	return (DIRBLKSIZ);
2479 }
2480 
2481 /*
2482  * allocate a block or frag
2483  */
2484 static daddr32_t
2485 alloc(int size, int mode)
2486 {
2487 	int i, frag;
2488 	daddr32_t d;
2489 
2490 	rdfs(fsbtodb(&sblock, (uint64_t)cgtod(&sblock, 0)), sblock.fs_cgsize,
2491 	    (char *)&acg);
2492 	if (acg.cg_magic != CG_MAGIC) {
2493 		(void) fprintf(stderr, gettext("cg 0: bad magic number\n"));
2494 		lockexit(32);
2495 	}
2496 	if (acg.cg_cs.cs_nbfree == 0) {
2497 		(void) fprintf(stderr,
2498 			gettext("first cylinder group ran out of space\n"));
2499 		lockexit(32);
2500 	}
2501 	for (d = 0; d < acg.cg_ndblk; d += sblock.fs_frag)
2502 		if (isblock(&sblock, cg_blksfree(&acg), d / sblock.fs_frag))
2503 			goto goth;
2504 	(void) fprintf(stderr,
2505 	    gettext("internal error: can't find block in cyl 0\n"));
2506 	lockexit(32);
2507 goth:
2508 	clrblock(&sblock, cg_blksfree(&acg), d / sblock.fs_frag);
2509 	acg.cg_cs.cs_nbfree--;
2510 	sblock.fs_cstotal.cs_nbfree--;
2511 	fscs[0].cs_nbfree--;
2512 	if (mode & IFDIR) {
2513 		acg.cg_cs.cs_ndir++;
2514 		sblock.fs_cstotal.cs_ndir++;
2515 		fscs[0].cs_ndir++;
2516 	}
2517 	cg_blktot(&acg)[cbtocylno(&sblock, d)]--;
2518 	cg_blks(&sblock, &acg, cbtocylno(&sblock, d))[cbtorpos(&sblock, d)]--;
2519 	if (size != sblock.fs_bsize) {
2520 		frag = howmany(size, sblock.fs_fsize);
2521 		fscs[0].cs_nffree += sblock.fs_frag - frag;
2522 		sblock.fs_cstotal.cs_nffree += sblock.fs_frag - frag;
2523 		acg.cg_cs.cs_nffree += sblock.fs_frag - frag;
2524 		acg.cg_frsum[sblock.fs_frag - frag]++;
2525 		for (i = frag; i < sblock.fs_frag; i++)
2526 			setbit(cg_blksfree(&acg), d + i);
2527 	}
2528 	wtfs(fsbtodb(&sblock, (uint64_t)cgtod(&sblock, 0)), sblock.fs_cgsize,
2529 	    (char *)&acg);
2530 	return (d);
2531 }
2532 
2533 /*
2534  * Allocate an inode on the disk
2535  */
2536 static void
2537 iput(struct inode *ip)
2538 {
2539 	struct dinode buf[MAXINOPB];
2540 	diskaddr_t d;
2541 
2542 	rdfs(fsbtodb(&sblock, (uint64_t)cgtod(&sblock, 0)), sblock.fs_cgsize,
2543 	    (char *)&acg);
2544 	if (acg.cg_magic != CG_MAGIC) {
2545 		(void) fprintf(stderr, gettext("cg 0: bad magic number\n"));
2546 		lockexit(32);
2547 	}
2548 	acg.cg_cs.cs_nifree--;
2549 	setbit(cg_inosused(&acg), ip->i_number);
2550 	wtfs(fsbtodb(&sblock, (uint64_t)cgtod(&sblock, 0)), sblock.fs_cgsize,
2551 	    (char *)&acg);
2552 	sblock.fs_cstotal.cs_nifree--;
2553 	fscs[0].cs_nifree--;
2554 	if ((int)ip->i_number >= sblock.fs_ipg * sblock.fs_ncg) {
2555 		(void) fprintf(stderr,
2556 			gettext("fsinit: inode value out of range (%d).\n"),
2557 			ip->i_number);
2558 		lockexit(32);
2559 	}
2560 	d = fsbtodb(&sblock, (uint64_t)itod(&sblock, (int)ip->i_number));
2561 	rdfs(d, sblock.fs_bsize, (char *)buf);
2562 	buf[itoo(&sblock, (int)ip->i_number)].di_ic = ip->i_ic;
2563 	wtfs(d, sblock.fs_bsize, (char *)buf);
2564 }
2565 
2566 /*
2567  * getbuf()	-- Get a buffer for use in an AIO operation.  Buffer
2568  *		is zero'd the first time returned, left with whatever
2569  *		was in memory after that.  This function actually gets
2570  *		enough memory the first time it's called to support
2571  *		MAXBUF buffers like a slab allocator.  When all the
2572  *		buffers are in use, it waits for an aio to complete
2573  *		and make a buffer available.
2574  *
2575  *		Never returns an error.  Either succeeds or exits.
2576  */
2577 static char *
2578 getbuf(bufhdr *bufhead, int size)
2579 {
2580 	bufhdr *pbuf;
2581 	bufhdr *prev;
2582 	int i;
2583 	int buf_size, max_bufs;
2584 
2585 	/*
2586 	 * Initialize all the buffers
2587 	 */
2588 	if (bufhead->head == NULL) {
2589 		/*
2590 		 * round up the size of our buffer header to a
2591 		 * 16 byte boundary so the address we return to
2592 		 * the caller is "suitably aligned".
2593 		 */
2594 		bufhdrsize = (sizeof (bufhdr) + 15) & ~15;
2595 
2596 		/*
2597 		 * Add in our header to the buffer and round it all up to
2598 		 * a 16 byte boundry so each member of the slab is aligned.
2599 		 */
2600 		buf_size = (size + bufhdrsize + 15) & ~15;
2601 
2602 		/*
2603 		 * Limit number of buffers to lesser of MAXBUFMEM's worth
2604 		 * or MAXBUF, whichever is less.
2605 		 */
2606 		max_bufs = MAXBUFMEM / buf_size;
2607 		if (max_bufs > MAXBUF)
2608 			max_bufs = MAXBUF;
2609 
2610 		pbuf = (bufhdr *)calloc(max_bufs, buf_size);
2611 		if (pbuf == NULL) {
2612 			perror("calloc");
2613 			lockexit(32);
2614 		}
2615 
2616 		bufhead->head = bufhead;
2617 		prev = bufhead;
2618 		for (i = 0; i < max_bufs; i++) {
2619 			pbuf->head = bufhead;
2620 			prev->next = pbuf;
2621 			prev = pbuf;
2622 			pbuf = (bufhdr *)((char *)pbuf + buf_size);
2623 		}
2624 	}
2625 
2626 	/*
2627 	 * Get an available buffer, waiting for I/O if necessary
2628 	 */
2629 	wait_for_write(NOBLOCK);
2630 	while (bufhead->next == NULL)
2631 		wait_for_write(BLOCK);
2632 
2633 	/*
2634 	 * Take the buffer off the list
2635 	 */
2636 	pbuf = bufhead->next;
2637 	bufhead->next = pbuf->next;
2638 	pbuf->next = NULL;
2639 
2640 	/*
2641 	 * return the empty buffer space just past the header
2642 	 */
2643 	return ((char *)pbuf + bufhdrsize);
2644 }
2645 
2646 /*
2647  * freebuf()	-- Free a buffer gotten previously through getbuf.
2648  *		Puts the buffer back on the appropriate list for
2649  *		later use.  Never calls free().
2650  *
2651  * Assumes that SIGINT is blocked.
2652  */
2653 static void
2654 freebuf(char *buf)
2655 {
2656 	bufhdr *pbuf;
2657 	bufhdr *bufhead;
2658 
2659 	/*
2660 	 * get the header for this buffer
2661 	 */
2662 	pbuf = (bufhdr *)(buf - bufhdrsize);
2663 
2664 	/*
2665 	 * Put it back on the list of available buffers
2666 	 */
2667 	bufhead = pbuf->head;
2668 	pbuf->next = bufhead->next;
2669 	bufhead->next = pbuf;
2670 }
2671 
2672 /*
2673  * freetrans()	-- Free a transaction gotten previously through getaiop.
2674  *		Puts the transaction struct back on the appropriate list for
2675  *		later use.  Never calls free().
2676  *
2677  * Assumes that SIGINT is blocked.
2678  */
2679 static void
2680 freetrans(aio_trans *transp)
2681 {
2682 	/*
2683 	 * free the buffer associated with this AIO if needed
2684 	 */
2685 	if (transp->release == RELEASE)
2686 		freebuf(transp->buffer);
2687 
2688 	/*
2689 	 * Put transaction on the free list
2690 	 */
2691 	transp->next = results.trans;
2692 	results.trans = transp;
2693 }
2694 
2695 /*
2696  * wait_for_write()	-- Wait for an aio write to complete.  Return
2697  *			the transaction structure for that write.
2698  *
2699  * Blocks SIGINT if necessary.
2700  */
2701 aio_trans *
2702 wait_for_write(int block)
2703 {
2704 	aio_trans	*transp;
2705 	aio_result_t	*resultp;
2706 	static struct timeval  zero_wait = { 0, 0 };
2707 	sigset_t	old_mask;
2708 
2709 	/*
2710 	 * If we know there aren't any outstanding transactions, just return
2711 	 */
2712 	if (results.outstanding == 0)
2713 		return ((aio_trans *) 0);
2714 
2715 	block_sigint(&old_mask);
2716 
2717 	resultp = aiowait(block ? NULL : &zero_wait);
2718 	if (resultp == NULL ||
2719 	    (resultp == (aio_result_t *)-1 && errno == EINVAL)) {
2720 		unblock_sigint(&old_mask);
2721 		return ((aio_trans *) 0);
2722 	}
2723 
2724 	results.outstanding--;
2725 	transp = (aio_trans *)resultp;
2726 
2727 	if (resultp->aio_return != transp->size) {
2728 		if (resultp->aio_return == -1) {
2729 			/*
2730 			 * The aiowrite() may have failed because the
2731 			 * kernel didn't have enough memory to do the job.
2732 			 * Flush all pending writes and try a normal
2733 			 * write().  wtfs_breakup() will call exit if it
2734 			 * fails, so we don't worry about errors here.
2735 			 */
2736 			flush_writes();
2737 			wtfs_breakup(transp->bno, transp->size, transp->buffer);
2738 		} else {
2739 			(void) fprintf(stderr, gettext(
2740 			    "short write (%d of %d bytes) on sector %lld\n"),
2741 			    resultp->aio_return, transp->size,
2742 			    transp->bno);
2743 			/*
2744 			 * Don't unblock SIGINT, to avoid potential
2745 			 * looping due to queued interrupts and
2746 			 * error handling.
2747 			 */
2748 			lockexit(32);
2749 		}
2750 	}
2751 
2752 	resultp->aio_return = 0;
2753 	freetrans(transp);
2754 	unblock_sigint(&old_mask);
2755 	return (transp);
2756 }
2757 
2758 /*
2759  * flush_writes()	-- flush all the outstanding aio writes.
2760  */
2761 static void
2762 flush_writes(void)
2763 {
2764 	while (wait_for_write(BLOCK))
2765 		;
2766 }
2767 
2768 /*
2769  * get_aiop()	-- find and return an aio_trans structure on which a new
2770  *		aio can be done.  Blocks on aiowait() if needed.  Reaps
2771  *		all outstanding completed aio's.
2772  *
2773  * Assumes that SIGINT is blocked.
2774  */
2775 aio_trans *
2776 get_aiop()
2777 {
2778 	int i;
2779 	aio_trans *transp;
2780 	aio_trans *prev;
2781 
2782 	/*
2783 	 * initialize aio stuff
2784 	 */
2785 	if (!aio_inited) {
2786 		aio_inited = 1;
2787 
2788 		results.maxpend = 0;
2789 		results.outstanding = 0;
2790 		results.max = MAXAIO;
2791 
2792 		results.trans = (aio_trans *)calloc(results.max,
2793 						sizeof (aio_trans));
2794 		if (results.trans == NULL) {
2795 			perror("calloc");
2796 			lockexit(32);
2797 		}
2798 
2799 		/*
2800 		 * Initialize the linked list of aio transaction
2801 		 * structures.  Note that the final "next" pointer
2802 		 * will be NULL since we got the buffer from calloc().
2803 		 */
2804 		prev = results.trans;
2805 		for (i = 1; i < results.max; i++) {
2806 			prev->next = &(results.trans[i]);
2807 			prev = prev->next;
2808 		}
2809 	}
2810 
2811 	wait_for_write(NOBLOCK);
2812 	while (results.trans == NULL)
2813 		wait_for_write(BLOCK);
2814 	transp = results.trans;
2815 	results.trans = results.trans->next;
2816 
2817 	transp->next = 0;
2818 	transp->resultbuf.aio_return = AIO_INPROGRESS;
2819 	return (transp);
2820 }
2821 
2822 /*
2823  * read a block from the file system
2824  */
2825 static void
2826 rdfs(diskaddr_t bno, int size, char *bf)
2827 {
2828 	int n, saverr;
2829 
2830 	/*
2831 	 * In case we need any data that's pending in an aiowrite(),
2832 	 * we wait for them all to complete before doing a read.
2833 	 */
2834 	flush_writes();
2835 
2836 	/*
2837 	 * Note: the llseek() can succeed, even if the offset is out of range.
2838 	 * It's not until the file i/o operation (the read()) that one knows
2839 	 * for sure if the raw device can handle the offset.
2840 	 */
2841 	if (llseek(fsi, (offset_t)bno * sectorsize, 0) < 0) {
2842 		saverr = errno;
2843 		(void) fprintf(stderr,
2844 		    gettext("seek error on sector %lld: %s\n"),
2845 		    bno, strerror(saverr));
2846 		lockexit(32);
2847 	}
2848 	n = read(fsi, bf, size);
2849 	if (n != size) {
2850 		saverr = errno;
2851 		if (n == -1)
2852 			(void) fprintf(stderr,
2853 			    gettext("read error on sector %lld: %s\n"),
2854 			    bno, strerror(saverr));
2855 		else
2856 			(void) fprintf(stderr, gettext(
2857 			    "short read (%d of %d bytes) on sector %lld\n"),
2858 			    n, size, bno);
2859 		lockexit(32);
2860 	}
2861 }
2862 
2863 /*
2864  * write a block to the file system
2865  */
2866 static void
2867 wtfs(diskaddr_t bno, int size, char *bf)
2868 {
2869 	int n, saverr;
2870 
2871 	if (fso == -1)
2872 		return;
2873 
2874 	/*
2875 	 * Note: the llseek() can succeed, even if the offset is out of range.
2876 	 * It's not until the file i/o operation (the write()) that one knows
2877 	 * for sure if the raw device can handle the offset.
2878 	 */
2879 	if (llseek(fso, (offset_t)bno * sectorsize, 0) < 0) {
2880 		saverr = errno;
2881 		(void) fprintf(stderr,
2882 		    gettext("seek error on sector %lld: %s\n"),
2883 		    bno, strerror(saverr));
2884 		lockexit(32);
2885 	}
2886 	if (Nflag)
2887 		return;
2888 	n = write(fso, bf, size);
2889 	if (n != size) {
2890 		saverr = errno;
2891 		if (n == -1)
2892 			(void) fprintf(stderr,
2893 			    gettext("write error on sector %lld: %s\n"),
2894 			    bno, strerror(saverr));
2895 		else
2896 			(void) fprintf(stderr, gettext(
2897 			    "short write (%d of %d bytes) on sector %lld\n"),
2898 			    n, size, bno);
2899 		lockexit(32);
2900 	}
2901 }
2902 
2903 /*
2904  * write a block to the file system -- buffered with aio
2905  */
2906 static void
2907 awtfs(diskaddr_t bno, int size, char *bf, int release)
2908 {
2909 	int n;
2910 	aio_trans 	*transp;
2911 	sigset_t 	old_mask;
2912 
2913 	if (fso == -1)
2914 		return;
2915 
2916 	/*
2917 	 * We need to keep things consistent if we get interrupted,
2918 	 * so defer any expected interrupts for the time being.
2919 	 */
2920 	block_sigint(&old_mask);
2921 
2922 	if (Nflag) {
2923 		if (release == RELEASE)
2924 			freebuf(bf);
2925 	} else {
2926 		transp = get_aiop();
2927 		transp->bno = bno;
2928 		transp->buffer = bf;
2929 		transp->size = size;
2930 		transp->release = release;
2931 
2932 		n = aiowrite(fso, bf, size, (off_t)bno * sectorsize,
2933 				SEEK_SET, &transp->resultbuf);
2934 
2935 		if (n < 0) {
2936 			/*
2937 			 * The aiowrite() may have failed because the
2938 			 * kernel didn't have enough memory to do the job.
2939 			 * Flush all pending writes and try a normal
2940 			 * write().  wtfs_breakup() will call exit if it
2941 			 * fails, so we don't worry about errors here.
2942 			 */
2943 			flush_writes();
2944 			wtfs_breakup(transp->bno, transp->size, transp->buffer);
2945 			freetrans(transp);
2946 		} else {
2947 			/*
2948 			 * Keep track of our pending writes.
2949 			 */
2950 			results.outstanding++;
2951 			if (results.outstanding > results.maxpend)
2952 			    results.maxpend = results.outstanding;
2953 		}
2954 	}
2955 
2956 	unblock_sigint(&old_mask);
2957 }
2958 
2959 
2960 /*
2961  * write a block to the file system, but break it up into sbsize
2962  * chunks to avoid forcing a large amount of memory to be locked down.
2963  * Only used as a fallback when an aio write has failed.
2964  */
2965 static void
2966 wtfs_breakup(diskaddr_t bno, int size, char *bf)
2967 {
2968 	int n, saverr;
2969 	int wsize;
2970 	int block_incr = sbsize / sectorsize;
2971 
2972 	if (size < sbsize)
2973 		wsize = size;
2974 	else
2975 		wsize = sbsize;
2976 
2977 	n = 0;
2978 	while (size) {
2979 		/*
2980 		 * Note: the llseek() can succeed, even if the offset is
2981 		 * out of range.  It's not until the file i/o operation
2982 		 * (the write()) that one knows for sure if the raw device
2983 		 * can handle the offset.
2984 		 */
2985 		if (llseek(fso, (offset_t)bno * sectorsize, 0) < 0) {
2986 			saverr = errno;
2987 			(void) fprintf(stderr,
2988 			    gettext("seek error on sector %lld: %s\n"),
2989 			    bno, strerror(saverr));
2990 			lockexit(32);
2991 		}
2992 
2993 		n = write(fso, bf, wsize);
2994 		if (n == -1) {
2995 			saverr = errno;
2996 			(void) fprintf(stderr,
2997 			    gettext("write error on sector %lld: %s\n"),
2998 			    bno, strerror(saverr));
2999 			lockexit(32);
3000 		}
3001 		if (n != wsize) {
3002 			saverr = errno;
3003 			(void) fprintf(stderr, gettext(
3004 			    "short write (%d of %d bytes) on sector %lld\n"),
3005 			    n, size, bno);
3006 			lockexit(32);
3007 		}
3008 
3009 		bno += block_incr;
3010 		bf += wsize;
3011 		size -= wsize;
3012 		if (size < wsize)
3013 			wsize = size;
3014 	}
3015 }
3016 
3017 
3018 /*
3019  * check if a block is available
3020  */
3021 static int
3022 isblock(struct fs *fs, unsigned char *cp, int h)
3023 {
3024 	unsigned char mask;
3025 
3026 	switch (fs->fs_frag) {
3027 	case 8:
3028 		return (cp[h] == 0xff);
3029 	case 4:
3030 		mask = 0x0f << ((h & 0x1) << 2);
3031 		return ((cp[h >> 1] & mask) == mask);
3032 	case 2:
3033 		mask = 0x03 << ((h & 0x3) << 1);
3034 		return ((cp[h >> 2] & mask) == mask);
3035 	case 1:
3036 		mask = 0x01 << (h & 0x7);
3037 		return ((cp[h >> 3] & mask) == mask);
3038 	default:
3039 		(void) fprintf(stderr, "isblock bad fs_frag %d\n", fs->fs_frag);
3040 		return (0);
3041 	}
3042 }
3043 
3044 /*
3045  * take a block out of the map
3046  */
3047 static void
3048 clrblock(struct fs *fs, unsigned char *cp, int h)
3049 {
3050 	switch ((fs)->fs_frag) {
3051 	case 8:
3052 		cp[h] = 0;
3053 		return;
3054 	case 4:
3055 		cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2));
3056 		return;
3057 	case 2:
3058 		cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1));
3059 		return;
3060 	case 1:
3061 		cp[h >> 3] &= ~(0x01 << (h & 0x7));
3062 		return;
3063 	default:
3064 		(void) fprintf(stderr,
3065 		    gettext("clrblock: bad fs_frag value %d\n"), fs->fs_frag);
3066 		return;
3067 	}
3068 }
3069 
3070 /*
3071  * put a block into the map
3072  */
3073 static void
3074 setblock(struct fs *fs, unsigned char *cp, int h)
3075 {
3076 	switch (fs->fs_frag) {
3077 	case 8:
3078 		cp[h] = 0xff;
3079 		return;
3080 	case 4:
3081 		cp[h >> 1] |= (0x0f << ((h & 0x1) << 2));
3082 		return;
3083 	case 2:
3084 		cp[h >> 2] |= (0x03 << ((h & 0x3) << 1));
3085 		return;
3086 	case 1:
3087 		cp[h >> 3] |= (0x01 << (h & 0x7));
3088 		return;
3089 	default:
3090 		(void) fprintf(stderr,
3091 		    gettext("setblock: bad fs_frag value %d\n"), fs->fs_frag);
3092 		return;
3093 	}
3094 }
3095 
3096 static void
3097 usage()
3098 {
3099 	(void) fprintf(stderr,
3100 	    gettext("ufs usage: mkfs [-F FSType] [-V] [-m] [-o options] "
3101 		"special "			/* param 0 */
3102 		"size(sectors) \\ \n"));	/* param 1 */
3103 	(void) fprintf(stderr,
3104 		"[nsect "			/* param 2 */
3105 		"ntrack "			/* param 3 */
3106 		"bsize "			/* param 4 */
3107 		"fragsize "			/* param 5 */
3108 		"cpg "				/* param 6 */
3109 		"free "				/* param 7 */
3110 		"rps "				/* param 8 */
3111 		"nbpi "				/* param 9 */
3112 		"opt "				/* param 10 */
3113 		"apc "				/* param 11 */
3114 		"gap "				/* param 12 */
3115 		"nrpos "			/* param 13 */
3116 		"maxcontig "			/* param 14 */
3117 		"mtb]\n");			/* param 15 */
3118 	(void) fprintf(stderr,
3119 		gettext(" -m : dump fs cmd line used to make this partition\n"
3120 		" -V :print this command line and return\n"
3121 		" -o :ufs options: :nsect=%d,ntrack=%d,bsize=%d,fragsize=%d\n"
3122 		" -o :ufs options: :cgsize=%d,free=%d,rps=%d,nbpi=%d,opt=%c\n"
3123 		" -o :ufs options: :apc=%d,gap=%d,nrpos=%d,maxcontig=%d\n"
3124 		" -o :ufs options: :mtb=%c,calcsb,calcbinsb\n"
3125 "NOTE that all -o suboptions: must be separated only by commas so as to\n"
3126 "be parsed as a single argument\n"),
3127 		nsect, ntrack, bsize, fragsize, cpg, sblock.fs_minfree, rps,
3128 		nbpi, opt, apc, (rotdelay == -1) ? 0 : rotdelay,
3129 		sblock.fs_nrpos, maxcontig, mtb);
3130 	lockexit(32);
3131 }
3132 
3133 /*ARGSUSED*/
3134 static void
3135 dump_fscmd(char *fsys, int fsi)
3136 {
3137 	int64_t used, bpcg, inospercg;
3138 	int64_t nbpi;
3139 	uint64_t nbytes64;
3140 
3141 	bzero((char *)&sblock, sizeof (sblock));
3142 	rdfs((diskaddr_t)SBLOCK, SBSIZE, (char *)&sblock);
3143 
3144 	/*
3145 	 * ensure a valid file system and if not, exit with error or else
3146 	 * we will end up computing block numbers etc and dividing by zero
3147 	 * which will cause floating point errors in this routine.
3148 	 */
3149 
3150 	if ((sblock.fs_magic != FS_MAGIC) &&
3151 	    (sblock.fs_magic != MTB_UFS_MAGIC)) {
3152 	    (void) fprintf(stderr, gettext(
3153 		"[not currently a valid file system - bad superblock]\n"));
3154 		lockexit(32);
3155 	}
3156 
3157 	if (sblock.fs_magic == MTB_UFS_MAGIC &&
3158 	    (sblock.fs_version > MTB_UFS_VERSION_1 ||
3159 	    sblock.fs_version < MTB_UFS_VERSION_MIN)) {
3160 	    (void) fprintf(stderr, gettext(
3161 		"Unknown version of UFS format: %d\n"), sblock.fs_version);
3162 		lockexit(32);
3163 	}
3164 
3165 	/*
3166 	 * Compute a reasonable nbpi value.
3167 	 * The algorithm for "used" is copied from code
3168 	 * in main() verbatim.
3169 	 * The nbpi equation is taken from main where the
3170 	 * fs_ipg value is set for the last time.  The INOPB(...) - 1
3171 	 * is used to account for the roundup.
3172 	 * The problem is that a range of nbpi values map to
3173 	 * the same file system layout.  So it is not possible
3174 	 * to calculate the exact value specified when the file
3175 	 * system was created.  So instead we determine the top
3176 	 * end of the range of values.
3177 	 */
3178 	bpcg = sblock.fs_spc * sectorsize;
3179 	inospercg = (int64_t)roundup(bpcg / sizeof (struct dinode),
3180 	    INOPB(&sblock));
3181 	if (inospercg > MAXIpG(&sblock))
3182 		inospercg = MAXIpG(&sblock);
3183 	used = (int64_t)
3184 	    (sblock.fs_iblkno + inospercg / INOPF(&sblock)) * NSPF(&sblock);
3185 	used *= sectorsize;
3186 	nbytes64 = (uint64_t)sblock.fs_cpg * bpcg - used;
3187 
3188 	/*
3189 	 * The top end of the range of values for nbpi may not be
3190 	 * a valid command line value for mkfs. Report the bottom
3191 	 * end instead.
3192 	 */
3193 	nbpi = (int64_t)(nbytes64 / (sblock.fs_ipg));
3194 
3195 	(void) fprintf(stdout, gettext("mkfs -F ufs -o "), fsys);
3196 	(void) fprintf(stdout, "nsect=%d,ntrack=%d,",
3197 	    sblock.fs_nsect, sblock.fs_ntrak);
3198 	(void) fprintf(stdout, "bsize=%d,fragsize=%d,cgsize=%d,free=%d,",
3199 	    sblock.fs_bsize, sblock.fs_fsize, sblock.fs_cpg, sblock.fs_minfree);
3200 	(void) fprintf(stdout, "rps=%d,nbpi=%lld,opt=%c,apc=%d,gap=%d,",
3201 	    sblock.fs_rps, nbpi, (sblock.fs_optim == FS_OPTSPACE) ? 's' : 't',
3202 	    (sblock.fs_ntrak * sblock.fs_nsect) - sblock.fs_spc,
3203 	    sblock.fs_rotdelay);
3204 	(void) fprintf(stdout, "nrpos=%d,maxcontig=%d,mtb=%c ",
3205 	    sblock.fs_nrpos, sblock.fs_maxcontig,
3206 	    ((sblock.fs_magic == MTB_UFS_MAGIC) ? 'y' : 'n'));
3207 	(void) fprintf(stdout, "%s %lld\n", fsys,
3208 	    fsbtodb(&sblock, sblock.fs_size));
3209 
3210 	bzero((char *)&sblock, sizeof (sblock));
3211 }
3212 
3213 /* number ************************************************************* */
3214 /*									*/
3215 /* Convert a numeric string arg to binary				*/
3216 /*									*/
3217 /* Args:	d_value - default value, if have parse error		*/
3218 /*		param - the name of the argument, for error messages	*/
3219 /*		flags - parser state and what's allowed in the arg	*/
3220 /* Global arg:  string - pointer to command arg				*/
3221 /*									*/
3222 /* Valid forms: 123 | 123k | 123*123 | 123x123				*/
3223 /*									*/
3224 /* Return:	converted number					*/
3225 /*									*/
3226 /* ******************************************************************** */
3227 
3228 static uint64_t
3229 number(uint64_t d_value, char *param, int flags)
3230 {
3231 	char *cs;
3232 	uint64_t n, t;
3233 	uint64_t cut = BIG / 10;    /* limit to avoid overflow */
3234 	int minus = 0;
3235 
3236 	cs = string;
3237 	if (*cs == '-') {
3238 		minus = 1;
3239 		cs += 1;
3240 	}
3241 	if ((*cs < '0') || (*cs > '9')) {
3242 		goto bail_out;
3243 	}
3244 	n = 0;
3245 	while ((*cs >= '0') && (*cs <= '9') && (n <= cut)) {
3246 		n = n*10 + *cs++ - '0';
3247 	}
3248 	if (minus)
3249 	    n = -n;
3250 	for (;;) {
3251 		switch (*cs++) {
3252 		case 'k':
3253 			if (flags & ALLOW_END_ONLY)
3254 				goto bail_out;
3255 			if (n > (BIG / 1024))
3256 				goto overflow;
3257 			n *= 1024;
3258 			continue;
3259 
3260 		case '*':
3261 		case 'x':
3262 			if (flags & ALLOW_END_ONLY)
3263 				goto bail_out;
3264 			string = cs;
3265 			t = number(d_value, param, flags);
3266 			if (n > (BIG / t))
3267 				goto overflow;
3268 			n *= t;
3269 			cs = string + 1; /* adjust for -- below */
3270 
3271 			/* recursion has read rest of expression */
3272 			/* FALLTHROUGH */
3273 
3274 		case ',':
3275 		case '\0':
3276 			cs--;
3277 			string = cs;
3278 			return (n);
3279 
3280 		case '%':
3281 			if (flags & ALLOW_END_ONLY)
3282 				goto bail_out;
3283 			if (flags & ALLOW_PERCENT) {
3284 				flags &= ~ALLOW_PERCENT;
3285 				flags |= ALLOW_END_ONLY;
3286 				continue;
3287 			}
3288 			goto bail_out;
3289 
3290 		case 'm':
3291 			if (flags & ALLOW_END_ONLY)
3292 				goto bail_out;
3293 			if (flags & ALLOW_MS1) {
3294 				flags &= ~ALLOW_MS1;
3295 				flags |= ALLOW_MS2;
3296 				continue;
3297 			}
3298 			goto bail_out;
3299 
3300 		case 's':
3301 			if (flags & ALLOW_END_ONLY)
3302 				goto bail_out;
3303 			if (flags & ALLOW_MS2) {
3304 				flags &= ~ALLOW_MS2;
3305 				flags |= ALLOW_END_ONLY;
3306 				continue;
3307 			}
3308 			goto bail_out;
3309 
3310 		case '0': case '1': case '2': case '3': case '4':
3311 		case '5': case '6': case '7': case '8': case '9':
3312 overflow:
3313 			(void) fprintf(stderr,
3314 			    gettext("mkfs: value for %s overflowed\n"),
3315 			    param);
3316 			while ((*cs != '\0') && (*cs != ','))
3317 				cs++;
3318 			string = cs;
3319 			return (BIG);
3320 
3321 		default:
3322 bail_out:
3323 			(void) fprintf(stderr, gettext(
3324 			    "mkfs: bad numeric arg for %s: \"%s\"\n"),
3325 			    param, string);
3326 			while ((*cs != '\0') && (*cs != ','))
3327 				cs++;
3328 			string = cs;
3329 			if (d_value != NO_DEFAULT) {
3330 				(void) fprintf(stderr,
3331 				    gettext("mkfs: %s reset to default %lld\n"),
3332 				    param, d_value);
3333 				return (d_value);
3334 			}
3335 			lockexit(2);
3336 
3337 		}
3338 	} /* never gets here */
3339 }
3340 
3341 /* match ************************************************************** */
3342 /*									*/
3343 /* Compare two text strings for equality				*/
3344 /*									*/
3345 /* Arg:	 s - pointer to string to match with a command arg		*/
3346 /* Global arg:  string - pointer to command arg				*/
3347 /*									*/
3348 /* Return:	1 if match, 0 if no match				*/
3349 /*		If match, also reset `string' to point to the text	*/
3350 /*		that follows the matching text.				*/
3351 /*									*/
3352 /* ******************************************************************** */
3353 
3354 static int
3355 match(char *s)
3356 {
3357 	char *cs;
3358 
3359 	cs = string;
3360 	while (*cs++ == *s) {
3361 		if (*s++ == '\0') {
3362 			goto true;
3363 		}
3364 	}
3365 	if (*s != '\0') {
3366 		return (0);
3367 	}
3368 
3369 true:
3370 	cs--;
3371 	string = cs;
3372 	return (1);
3373 }
3374 
3375 /*
3376  * GROWFS ROUTINES
3377  */
3378 
3379 /* ARGSUSED */
3380 void
3381 lockexit(int exitstatus)
3382 {
3383 	if (Pflag) {
3384 		/* the probe mode neither changes nor locks the filesystem */
3385 		exit(exitstatus);
3386 	}
3387 
3388 	/*
3389 	 * flush the dirty cylinder group
3390 	 */
3391 	if (inlockexit == 0) {
3392 		inlockexit = 1;
3393 		flcg();
3394 	}
3395 
3396 	if (aio_inited) {
3397 		flush_writes();
3398 	}
3399 
3400 	/*
3401 	 * make sure the file system is unlocked before exiting
3402 	 */
3403 	if ((inlockexit == 1) && (!isbad)) {
3404 		inlockexit = 2;
3405 		ulockfs();
3406 		/*
3407 		 * if logging was enabled, then re-enable it
3408 		 */
3409 		if (waslog) {
3410 			if (rl_log_control(fsys, _FIOLOGENABLE) != RL_SUCCESS) {
3411 				(void) fprintf(stderr, gettext(
3412 					"failed to re-enable logging\n"));
3413 			}
3414 		}
3415 	} else if (grow) {
3416 		if (isbad) {
3417 			(void) fprintf(stderr, gettext(
3418 				"Filesystem is currently inconsistent.  It "
3419 				"must be repaired with fsck(1M)\nbefore being "
3420 				"used.  Use the following command to "
3421 				"do this:\n\n\tfsck %s\n\n"),
3422 					fsys);
3423 
3424 			if (ismounted) {
3425 				(void) fprintf(stderr, gettext(
3426 					"You will be told that the filesystem "
3427 					"is already mounted, and asked if you\n"
3428 					"wish to continue.  Answer `yes' to "
3429 					"this question.\n\n"));
3430 			}
3431 
3432 			(void) fprintf(stderr, gettext(
3433 					"One problem should be reported, that "
3434 					"the summary information is bad.\n"
3435 					"You will then be asked if it "
3436 					"should be salvaged.  Answer `yes' "
3437 					"to\nthis question.\n\n"));
3438 		}
3439 
3440 		if (ismounted) {
3441 			/*
3442 			 * In theory, there's no way to get here without
3443 			 * isbad also being set, but be robust in the
3444 			 * face of future code changes.
3445 			 */
3446 			(void) fprintf(stderr, gettext(
3447 				"The filesystem is currently mounted "
3448 				"read-only and write-locked.  "));
3449 			if (isbad) {
3450 				(void) fprintf(stderr, gettext(
3451 					"After\nrunning fsck, unlock the "
3452 					"filesystem and "));
3453 			} else {
3454 				(void) fprintf(stderr, gettext(
3455 					"Unlock the filesystem\nand "));
3456 			}
3457 
3458 			(void) fprintf(stderr, gettext(
3459 				"re-enable writing with\nthe following "
3460 				"command:\n\n\tlockfs -u %s\n\n"),
3461 					directory);
3462 		}
3463 	}
3464 
3465 	exit(exitstatus);
3466 }
3467 
3468 void
3469 randomgeneration()
3470 {
3471 	int		 i;
3472 	struct dinode	*dp;
3473 
3474 	/*
3475 	 * always perform fsirand(1) function... newfs will notice that
3476 	 * the inodes have been randomized and will not call fsirand itself
3477 	 */
3478 	for (i = 0, dp = zino; i < sblock.fs_inopb; ++i, ++dp)
3479 		IRANDOMIZE(&dp->di_ic);
3480 }
3481 
3482 /*
3483  * Check the size of the summary information.
3484  * Fields in sblock are not changed in this function.
3485  *
3486  * For an 8K filesystem block, the maximum number of cylinder groups is 16384.
3487  *     MAXCSBUFS {32}  *   8K  {FS block size}
3488  *                         divided by (sizeof csum) {16}
3489  *
3490  * Note that MAXCSBUFS is not used in the kernel; as of Solaris 2.6 build 32,
3491  * this is the only place where it's referenced.
3492  */
3493 void
3494 checksummarysize()
3495 {
3496 	diskaddr_t	dmax;
3497 	diskaddr_t	dmin;
3498 	int64_t	cg0frags;
3499 	int64_t	cg0blocks;
3500 	int64_t	maxncg;
3501 	int64_t	maxfrags;
3502 	uint64_t	fs_size;
3503 	uint64_t maxfs_blocks; /* filesystem blocks for max filesystem size */
3504 
3505 	/*
3506 	 * compute the maximum summary info size
3507 	 */
3508 	dmin = cgdmin(&sblock, 0);
3509 	dmax = cgbase(&sblock, 0) + sblock.fs_fpg;
3510 	fs_size = (grow) ? grow_fs_size : sblock.fs_size;
3511 	if (dmax > fs_size)
3512 		dmax = fs_size;
3513 	cg0frags  = dmax - dmin;
3514 	cg0blocks = cg0frags / sblock.fs_frag;
3515 	cg0frags = cg0blocks * sblock.fs_frag;
3516 	maxncg   = (longlong_t)cg0blocks *
3517 	    (longlong_t)(sblock.fs_bsize / sizeof (struct csum));
3518 
3519 	maxfs_blocks = FS_MAX;
3520 
3521 	if (maxncg > ((longlong_t)maxfs_blocks / (longlong_t)sblock.fs_fpg) + 1)
3522 		maxncg = ((longlong_t)maxfs_blocks /
3523 		    (longlong_t)sblock.fs_fpg) + 1;
3524 
3525 	maxfrags = maxncg * (longlong_t)sblock.fs_fpg;
3526 
3527 	if (maxfrags > maxfs_blocks)
3528 		maxfrags = maxfs_blocks;
3529 
3530 
3531 	/*
3532 	 * remember for later processing in extendsummaryinfo()
3533 	 */
3534 	if (test)
3535 		grow_sifrag = dmin + (cg0blocks * sblock.fs_frag);
3536 	if (testfrags == 0)
3537 		testfrags = cg0frags;
3538 	if (testforce)
3539 		if (testfrags > cg0frags) {
3540 			(void) fprintf(stderr,
3541 				gettext("Too many test frags (%lld); "
3542 				"try %lld\n"), testfrags, cg0frags);
3543 			lockexit(32);
3544 		}
3545 
3546 	/*
3547 	 * if summary info is too large (too many cg's) tell the user and exit
3548 	 */
3549 	if ((longlong_t)sblock.fs_size > maxfrags) {
3550 		(void) fprintf(stderr, gettext(
3551 		    "Too many cylinder groups with %llu sectors;\n    try "
3552 		    "increasing cgsize, or decreasing fssize to %llu\n"),
3553 		    fsbtodb(&sblock, (uint64_t)sblock.fs_size),
3554 		    fsbtodb(&sblock, (uint64_t)maxfrags));
3555 		lockexit(32);
3556 	}
3557 }
3558 
3559 void
3560 checksblock()
3561 {
3562 	/*
3563 	 * make sure this is a file system
3564 	 */
3565 	if ((sblock.fs_magic != FS_MAGIC) &&
3566 	    (sblock.fs_magic != MTB_UFS_MAGIC)) {
3567 		(void) fprintf(stderr,
3568 			gettext("Bad superblock; magic number wrong\n"));
3569 		lockexit(32);
3570 	}
3571 
3572 	if (sblock.fs_magic == MTB_UFS_MAGIC &&
3573 	    sblock.fs_version > MTB_UFS_VERSION_1) {
3574 		(void) fprintf(stderr,
3575 			gettext("Unrecognized version of UFS\n"));
3576 		lockexit(32);
3577 	}
3578 
3579 	if (sblock.fs_ncg < 1) {
3580 		(void) fprintf(stderr,
3581 		    gettext("Bad superblock; ncg out of range\n"));
3582 		lockexit(32);
3583 	}
3584 	if (sblock.fs_cpg < 1) {
3585 		(void) fprintf(stderr,
3586 		    gettext("Bad superblock; cpg out of range\n"));
3587 		lockexit(32);
3588 	}
3589 	if (sblock.fs_ncg * sblock.fs_cpg < sblock.fs_ncyl ||
3590 	    (sblock.fs_ncg - 1) * sblock.fs_cpg >= sblock.fs_ncyl) {
3591 		(void) fprintf(stderr,
3592 		    gettext("Bad superblock; ncyl out of range\n"));
3593 		lockexit(32);
3594 	}
3595 	if (sblock.fs_sbsize <= 0 || sblock.fs_sbsize > sblock.fs_bsize) {
3596 		(void) fprintf(stderr, gettext(
3597 			"Bad superblock; superblock size out of range\n"));
3598 		lockexit(32);
3599 	}
3600 }
3601 
3602 /*
3603  * Roll the embedded log, if any, and set up the global variables
3604  * islog, islogok and isufslog.
3605  */
3606 static void
3607 logsetup(char *devstr)
3608 {
3609 	void		*buf, *ud_buf;
3610 	extent_block_t	*ebp;
3611 	ml_unit_t	*ul;
3612 	ml_odunit_t	*ud;
3613 
3614 	/*
3615 	 * Does the superblock indicate that we are supposed to have a log ?
3616 	 */
3617 	if (sblock.fs_logbno == 0) {
3618 		/*
3619 		 * No log present, nothing to do.
3620 		 */
3621 		islogok = 0;
3622 		islog = 0;
3623 		isufslog = 0;
3624 		return;
3625 	} else {
3626 		/*
3627 		 * There's a log in a yet unknown state, attempt to roll it.
3628 		 */
3629 		islog = 1;
3630 		islogok = 0;
3631 		isufslog = 0;
3632 
3633 		/*
3634 		 * We failed to roll the log, bail out.
3635 		 */
3636 		if (rl_roll_log(devstr) != RL_SUCCESS)
3637 			return;
3638 
3639 		isufslog = 1;
3640 
3641 		/* log is not okay; check the fs */
3642 		if ((FSOKAY != (sblock.fs_state + sblock.fs_time)) ||
3643 		    (sblock.fs_clean != FSLOG))
3644 			return;
3645 
3646 		/* get the log allocation block */
3647 		buf = (void *)malloc(DEV_BSIZE);
3648 		if (buf == (void *) NULL)
3649 			return;
3650 
3651 		ud_buf = (void *)malloc(DEV_BSIZE);
3652 		if (ud_buf == (void *) NULL) {
3653 			free(buf);
3654 			return;
3655 		}
3656 
3657 		rdfs((diskaddr_t)logbtodb(&sblock, sblock.fs_logbno),
3658 		    DEV_BSIZE, buf);
3659 		ebp = (extent_block_t *)buf;
3660 
3661 		/* log allocation block is not okay; check the fs */
3662 		if (ebp->type != LUFS_EXTENTS) {
3663 			free(buf);
3664 			free(ud_buf);
3665 			return;
3666 		}
3667 
3668 		/* get the log state block(s) */
3669 		rdfs((diskaddr_t)logbtodb(&sblock, ebp->extents[0].pbno),
3670 		    DEV_BSIZE, ud_buf);
3671 		ud = (ml_odunit_t *)ud_buf;
3672 		ul = (ml_unit_t *)malloc(sizeof (*ul));
3673 		ul->un_ondisk = *ud;
3674 
3675 		/* log state is okay */
3676 		if ((ul->un_chksum == ul->un_head_ident + ul->un_tail_ident) &&
3677 		    (ul->un_version == LUFS_VERSION_LATEST) &&
3678 		    (ul->un_badlog == 0))
3679 			islogok = 1;
3680 		free(ud_buf);
3681 		free(buf);
3682 		free(ul);
3683 	}
3684 }
3685 
3686 void
3687 growinit(char *devstr)
3688 {
3689 	int	i;
3690 	char	buf[DEV_BSIZE];
3691 
3692 	/*
3693 	 * Read and verify the superblock
3694 	 */
3695 	rdfs((diskaddr_t)(SBOFF / sectorsize), (int)sbsize, (char *)&sblock);
3696 	checksblock();
3697 	if (sblock.fs_postblformat != FS_DYNAMICPOSTBLFMT) {
3698 		(void) fprintf(stderr,
3699 			gettext("old file system format; can't growfs\n"));
3700 		lockexit(32);
3701 	}
3702 
3703 	/*
3704 	 * can't shrink a file system
3705 	 */
3706 	grow_fssize = fsbtodb(&sblock, (uint64_t)sblock.fs_size);
3707 	if (fssize_db < grow_fssize) {
3708 		(void) fprintf(stderr,
3709 		    gettext("%lld sectors < current size of %lld sectors\n"),
3710 		    fssize_db, grow_fssize);
3711 		lockexit(32);
3712 	}
3713 
3714 	/*
3715 	 * can't grow a system to over a terabyte unless it was set up
3716 	 * as an MTB UFS file system.
3717 	 */
3718 	if (mtb == 'y' && sblock.fs_magic != MTB_UFS_MAGIC) {
3719 		if (fssize_db >= SECTORS_PER_TERABYTE) {
3720 			(void) fprintf(stderr, gettext(
3721 "File system was not set up with the multi-terabyte format.\n"));
3722 			(void) fprintf(stderr, gettext(
3723 "Its size cannot be increased to a terabyte or more.\n"));
3724 		} else {
3725 			(void) fprintf(stderr, gettext(
3726 "Cannot convert file system to multi-terabyte format.\n"));
3727 		}
3728 		lockexit(32);
3729 	}
3730 
3731 	logsetup(devstr);
3732 
3733 	/*
3734 	 * can't growfs when logging device has errors
3735 	 */
3736 	if ((islog && !islogok) ||
3737 	    ((FSOKAY == (sblock.fs_state + sblock.fs_time)) &&
3738 	    (sblock.fs_clean == FSLOG && !islog))) {
3739 		(void) fprintf(stderr,
3740 			gettext("logging device has errors; can't growfs\n"));
3741 		lockexit(32);
3742 	}
3743 
3744 	/*
3745 	 * disable ufs logging for growing
3746 	 */
3747 	if (isufslog) {
3748 		if (rl_log_control(devstr, _FIOLOGDISABLE) != RL_SUCCESS) {
3749 			(void) fprintf(stderr, gettext(
3750 				"failed to disable logging\n"));
3751 			lockexit(32);
3752 		}
3753 		islog = 0;
3754 		waslog = 1;
3755 	}
3756 
3757 	/*
3758 	 * if mounted write lock the file system to be grown
3759 	 */
3760 	if (ismounted)
3761 		wlockfs();
3762 
3763 	/*
3764 	 * refresh dynamic superblock state - disabling logging will have
3765 	 * changed the amount of free space available in the file system
3766 	 */
3767 	rdfs((diskaddr_t)(SBOFF / sectorsize), sbsize, (char *)&sblock);
3768 
3769 	/*
3770 	 * make sure device is big enough
3771 	 */
3772 	rdfs((diskaddr_t)fssize_db - 1, DEV_BSIZE, buf);
3773 	wtfs((diskaddr_t)fssize_db - 1, DEV_BSIZE, buf);
3774 
3775 	/*
3776 	 * read current summary information
3777 	 */
3778 	grow_fscs = read_summaryinfo(&sblock);
3779 
3780 	/*
3781 	 * save some current size related fields from the superblock
3782 	 * These are used in extendsummaryinfo()
3783 	 */
3784 	grow_fs_size	= sblock.fs_size;
3785 	grow_fs_ncg	= sblock.fs_ncg;
3786 	grow_fs_csaddr	= (diskaddr_t)sblock.fs_csaddr;
3787 	grow_fs_cssize	= sblock.fs_cssize;
3788 
3789 	/*
3790 	 * save and reset the clean flag
3791 	 */
3792 	if (FSOKAY == (sblock.fs_state + sblock.fs_time))
3793 		grow_fs_clean = sblock.fs_clean;
3794 	else
3795 		grow_fs_clean = FSBAD;
3796 	sblock.fs_clean = FSBAD;
3797 	sblock.fs_state = FSOKAY - sblock.fs_time;
3798 	isbad = 1;
3799 	wtfs((diskaddr_t)(SBOFF / sectorsize), sbsize, (char *)&sblock);
3800 }
3801 
3802 void
3803 checkdev(char *rdev, char *bdev)
3804 {
3805 	struct stat64	statarea;
3806 
3807 	if (stat64(bdev, &statarea) < 0) {
3808 		(void) fprintf(stderr, gettext("can't check mount point; "));
3809 		(void) fprintf(stderr, gettext("can't stat %s\n"), bdev);
3810 		lockexit(32);
3811 	}
3812 	if ((statarea.st_mode & S_IFMT) != S_IFBLK) {
3813 		(void) fprintf(stderr, gettext(
3814 		    "can't check mount point; %s is not a block device\n"),
3815 		    bdev);
3816 		lockexit(32);
3817 	}
3818 	if (stat64(rdev, &statarea) < 0) {
3819 		(void) fprintf(stderr, gettext("can't stat %s\n"), rdev);
3820 		lockexit(32);
3821 	}
3822 	if ((statarea.st_mode & S_IFMT) != S_IFCHR) {
3823 		(void) fprintf(stderr,
3824 			gettext("%s is not a character device\n"), rdev);
3825 		lockexit(32);
3826 	}
3827 }
3828 
3829 void
3830 checkmount(struct mnttab *mntp, char *bdevname)
3831 {
3832 	struct stat64	statdir;
3833 	struct stat64	statdev;
3834 
3835 	if (strcmp(bdevname, mntp->mnt_special) == 0) {
3836 		if (stat64(mntp->mnt_mountp, &statdir) == -1) {
3837 			(void) fprintf(stderr, gettext("can't stat %s\n"),
3838 				mntp->mnt_mountp);
3839 			lockexit(32);
3840 		}
3841 		if (stat64(mntp->mnt_special, &statdev) == -1) {
3842 			(void) fprintf(stderr, gettext("can't stat %s\n"),
3843 				mntp->mnt_special);
3844 			lockexit(32);
3845 		}
3846 		if (statdir.st_dev != statdev.st_rdev) {
3847 			(void) fprintf(stderr, gettext(
3848 				"%s is not mounted on %s; mnttab(4) wrong\n"),
3849 				mntp->mnt_special, mntp->mnt_mountp);
3850 			lockexit(32);
3851 		}
3852 		ismounted = 1;
3853 		if (directory) {
3854 			if (strcmp(mntp->mnt_mountp, directory) != 0) {
3855 				(void) fprintf(stderr,
3856 				gettext("%s is mounted on %s, not %s\n"),
3857 				    bdevname, mntp->mnt_mountp, directory);
3858 				lockexit(32);
3859 			}
3860 		} else {
3861 			if (grow)
3862 				(void) fprintf(stderr, gettext(
3863 				    "%s is mounted on %s; can't growfs\n"),
3864 				    bdevname, mntp->mnt_mountp);
3865 			else
3866 				(void) fprintf(stderr,
3867 				    gettext("%s is mounted, can't mkfs\n"),
3868 				    bdevname);
3869 			lockexit(32);
3870 		}
3871 	}
3872 }
3873 
3874 struct dinode	*dibuf	= 0;
3875 diskaddr_t	difrag	= 0;
3876 
3877 struct dinode *
3878 gdinode(ino_t ino)
3879 {
3880 	/*
3881 	 * read the block of inodes containing inode number ino
3882 	 */
3883 	if (dibuf == 0)
3884 		dibuf = (struct dinode *)malloc((unsigned)sblock.fs_bsize);
3885 	if (itod(&sblock, ino) != difrag) {
3886 		difrag = itod(&sblock, ino);
3887 		rdfs(fsbtodb(&sblock, (uint64_t)difrag), (int)sblock.fs_bsize,
3888 			(char *)dibuf);
3889 	}
3890 	return (dibuf + (ino % INOPB(&sblock)));
3891 }
3892 
3893 /*
3894  * structure that manages the frags we need for extended summary info
3895  *	These frags can be:
3896  *		free
3897  *		data  block
3898  *		alloc block
3899  */
3900 struct csfrag {
3901 	struct csfrag	*next;		/* next entry */
3902 	daddr32_t	 ofrag;		/* old frag */
3903 	daddr32_t	 nfrag;		/* new frag */
3904 	long		 cylno;		/* cylno of nfrag */
3905 	long		 frags;		/* number of frags */
3906 	long		 size;		/* size in bytes */
3907 	ino_t		 ino;		/* inode number */
3908 	long		 fixed;		/* Boolean - Already fixed? */
3909 };
3910 struct csfrag	*csfrag;		/* state unknown */
3911 struct csfrag	*csfragino;		/* frags belonging to an inode */
3912 struct csfrag	*csfragfree;		/* frags that are free */
3913 
3914 daddr32_t maxcsfrag	= 0;		/* maximum in range */
3915 daddr32_t mincsfrag	= 0x7fffffff;	/* minimum in range */
3916 
3917 int
3918 csfraginrange(daddr32_t frag)
3919 {
3920 	return ((frag >= mincsfrag) && (frag <= maxcsfrag));
3921 }
3922 
3923 struct csfrag *
3924 findcsfrag(daddr32_t frag, struct csfrag **cfap)
3925 {
3926 	struct csfrag	*cfp;
3927 
3928 	if (!csfraginrange(frag))
3929 		return (NULL);
3930 
3931 	for (cfp = *cfap; cfp; cfp = cfp->next)
3932 		if (cfp->ofrag == frag)
3933 			return (cfp);
3934 	return (NULL);
3935 }
3936 
3937 void
3938 checkindirect(ino_t ino, daddr32_t *fragsp, daddr32_t frag, int level)
3939 {
3940 	int			i;
3941 	int			ne	= sblock.fs_bsize / sizeof (daddr32_t);
3942 	daddr32_t			fsb[MAXBSIZE / sizeof (daddr32_t)];
3943 
3944 	if (frag == 0)
3945 		return;
3946 
3947 	rdfs(fsbtodb(&sblock, frag), (int)sblock.fs_bsize,
3948 	    (char *)fsb);
3949 
3950 	checkdirect(ino, fragsp, fsb, sblock.fs_bsize / sizeof (daddr32_t));
3951 
3952 	if (level)
3953 		for (i = 0; i < ne && *fragsp; ++i)
3954 			checkindirect(ino, fragsp, fsb[i], level-1);
3955 }
3956 
3957 void
3958 addcsfrag(ino_t ino, daddr32_t frag, struct csfrag **cfap)
3959 {
3960 	struct csfrag	*cfp, *curr, *prev;
3961 
3962 	/*
3963 	 * establish a range for faster checking in csfraginrange()
3964 	 */
3965 	if (frag > maxcsfrag)
3966 		maxcsfrag = frag;
3967 	if (frag < mincsfrag)
3968 		mincsfrag = frag;
3969 
3970 	/*
3971 	 * if this frag belongs to an inode and is not the start of a block
3972 	 *	then see if it is part of a frag range for this inode
3973 	 */
3974 	if (ino && (frag % sblock.fs_frag))
3975 		for (cfp = *cfap; cfp; cfp = cfp->next) {
3976 			if (ino != cfp->ino)
3977 				continue;
3978 			if (frag != cfp->ofrag + cfp->frags)
3979 				continue;
3980 			cfp->frags++;
3981 			cfp->size += sblock.fs_fsize;
3982 			return;
3983 		}
3984 	/*
3985 	 * allocate a csfrag entry and insert it in an increasing order into the
3986 	 * specified list
3987 	 */
3988 	cfp = (struct csfrag *)calloc(1, sizeof (struct csfrag));
3989 	cfp->ino	= ino;
3990 	cfp->ofrag	= frag;
3991 	cfp->frags	= 1;
3992 	cfp->size	= sblock.fs_fsize;
3993 	for (prev = NULL, curr = *cfap; curr != NULL;
3994 		prev = curr, curr = curr->next) {
3995 		if (frag < curr->ofrag) {
3996 			cfp->next = curr;
3997 			if (prev)
3998 				prev->next = cfp;	/* middle element */
3999 			else
4000 				*cfap = cfp;		/* first element */
4001 			break;
4002 		}
4003 		if (curr->next == NULL) {
4004 			curr->next = cfp;		/* last element	*/
4005 			break;
4006 		}
4007 	}
4008 	if (*cfap == NULL)	/* will happen only once */
4009 		*cfap = cfp;
4010 }
4011 
4012 void
4013 delcsfrag(daddr32_t frag, struct csfrag **cfap)
4014 {
4015 	struct csfrag	*cfp;
4016 	struct csfrag	**cfpp;
4017 
4018 	/*
4019 	 * free up entry whose beginning frag matches
4020 	 */
4021 	for (cfpp = cfap; *cfpp; cfpp = &(*cfpp)->next) {
4022 		if (frag == (*cfpp)->ofrag) {
4023 			cfp = *cfpp;
4024 			*cfpp = (*cfpp)->next;
4025 			free((char *)cfp);
4026 			return;
4027 		}
4028 	}
4029 }
4030 
4031 /*
4032  * See whether any of the direct blocks in the array pointed by "db" and of
4033  * length "ne" are within the range of frags needed to extend the cylinder
4034  * summary. If so, remove those frags from the "as-yet-unclassified" list
4035  * (csfrag) and add them to the "owned-by-inode" list (csfragino).
4036  * For each such frag found, decrement the frag count pointed to by fragsp.
4037  * "ino" is the inode that contains (either directly or indirectly) the frags
4038  * being checked.
4039  */
4040 void
4041 checkdirect(ino_t ino, daddr32_t *fragsp, daddr32_t *db, int ne)
4042 {
4043 	int	 i;
4044 	int	 j;
4045 	int	 found;
4046 	diskaddr_t	 frag;
4047 
4048 	/*
4049 	 * scan for allocation within the new summary info range
4050 	 */
4051 	for (i = 0; i < ne && *fragsp; ++i) {
4052 		if ((frag = *db++) != 0) {
4053 			found = 0;
4054 			for (j = 0; j < sblock.fs_frag && *fragsp; ++j) {
4055 				if (found || (found = csfraginrange(frag))) {
4056 					addcsfrag(ino, frag, &csfragino);
4057 					delcsfrag(frag, &csfrag);
4058 				}
4059 				++frag;
4060 				--(*fragsp);
4061 			}
4062 		}
4063 	}
4064 }
4065 
4066 void
4067 findcsfragino()
4068 {
4069 	int		 i;
4070 	int		 j;
4071 	daddr32_t		 frags;
4072 	struct dinode	*dp;
4073 
4074 	/*
4075 	 * scan all old inodes looking for allocations in the new
4076 	 * summary info range.  Move the affected frag from the
4077 	 * generic csfrag list onto the `owned-by-inode' list csfragino.
4078 	 */
4079 	for (i = UFSROOTINO; i < grow_fs_ncg*sblock.fs_ipg && csfrag; ++i) {
4080 		dp = gdinode((ino_t)i);
4081 		switch (dp->di_mode & IFMT) {
4082 			case IFSHAD	:
4083 			case IFLNK 	:
4084 			case IFDIR 	:
4085 			case IFREG 	: break;
4086 			default		: continue;
4087 		}
4088 
4089 		frags   = dbtofsb(&sblock, dp->di_blocks);
4090 
4091 		checkdirect((ino_t)i, &frags, &dp->di_db[0], NDADDR+NIADDR);
4092 		for (j = 0; j < NIADDR && frags; ++j)
4093 			checkindirect((ino_t)i, &frags, dp->di_ib[j], j);
4094 	}
4095 }
4096 
4097 void
4098 fixindirect(daddr32_t frag, int level)
4099 {
4100 	int			 i;
4101 	int			 ne	= sblock.fs_bsize / sizeof (daddr32_t);
4102 	daddr32_t			fsb[MAXBSIZE / sizeof (daddr32_t)];
4103 
4104 	if (frag == 0)
4105 		return;
4106 
4107 	rdfs(fsbtodb(&sblock, (uint64_t)frag), (int)sblock.fs_bsize,
4108 	    (char *)fsb);
4109 
4110 	fixdirect((caddr_t)fsb, frag, fsb, ne);
4111 
4112 	if (level)
4113 		for (i = 0; i < ne; ++i)
4114 			fixindirect(fsb[i], level-1);
4115 }
4116 
4117 void
4118 fixdirect(caddr_t bp, daddr32_t frag, daddr32_t *db, int ne)
4119 {
4120 	int	 i;
4121 	struct csfrag	*cfp;
4122 
4123 	for (i = 0; i < ne; ++i, ++db) {
4124 		if (*db == 0)
4125 			continue;
4126 		if ((cfp = findcsfrag(*db, &csfragino)) == NULL)
4127 			continue;
4128 		*db = cfp->nfrag;
4129 		cfp->fixed = 1;
4130 		wtfs(fsbtodb(&sblock, (uint64_t)frag), (int)sblock.fs_bsize,
4131 		    bp);
4132 	}
4133 }
4134 
4135 void
4136 fixcsfragino()
4137 {
4138 	int		 i;
4139 	struct dinode	*dp;
4140 	struct csfrag	*cfp;
4141 
4142 	for (cfp = csfragino; cfp; cfp = cfp->next) {
4143 		if (cfp->fixed)
4144 			continue;
4145 		dp = gdinode((ino_t)cfp->ino);
4146 		fixdirect((caddr_t)dibuf, difrag, dp->di_db, NDADDR+NIADDR);
4147 		for (i = 0; i < NIADDR; ++i)
4148 			fixindirect(dp->di_ib[i], i);
4149 	}
4150 }
4151 
4152 /*
4153  * Read the cylinders summary information specified by settings in the
4154  * passed 'fs' structure into a new allocated array of csum structures.
4155  * The caller is responsible for freeing the returned array.
4156  * Return a pointer to an array of csum structures.
4157  */
4158 static struct csum *
4159 read_summaryinfo(struct	fs *fsp)
4160 {
4161 	struct csum 	*csp;
4162 	int		i;
4163 
4164 	if ((csp = malloc((size_t)fsp->fs_cssize)) == NULL) {
4165 		(void) fprintf(stderr, gettext("cannot create csum list,"
4166 			" not enough memory\n"));
4167 		exit(32);
4168 	}
4169 
4170 	for (i = 0; i < fsp->fs_cssize; i += fsp->fs_bsize) {
4171 		rdfs(fsbtodb(fsp,
4172 			(uint64_t)(fsp->fs_csaddr + numfrags(fsp, i))),
4173 			(int)(fsp->fs_cssize - i < fsp->fs_bsize ?
4174 			fsp->fs_cssize - i : fsp->fs_bsize),
4175 			((caddr_t)csp) + i);
4176 	}
4177 
4178 	return (csp);
4179 }
4180 
4181 /*
4182  * Check the allocation of fragments that are to be made part of a csum block.
4183  * A fragment is allocated if it is either in the csfragfree list or, it is
4184  * in the csfragino list and has new frags associated with it.
4185  * Return the number of allocated fragments.
4186  */
4187 int64_t
4188 checkfragallocated(daddr32_t frag)
4189 {
4190 	struct 	csfrag	*cfp;
4191 	/*
4192 	 * Since the lists are sorted we can break the search if the asked
4193 	 * frag is smaller then the one in the list.
4194 	 */
4195 	for (cfp = csfragfree; cfp != NULL && frag >= cfp->ofrag;
4196 		cfp = cfp->next) {
4197 		if (frag == cfp->ofrag)
4198 			return (1);
4199 	}
4200 	for (cfp = csfragino; cfp != NULL && frag >= cfp->ofrag;
4201 		cfp = cfp->next) {
4202 		if (frag == cfp->ofrag && cfp->nfrag != 0)
4203 			return (cfp->frags);
4204 	}
4205 
4206 	return (0);
4207 }
4208 
4209 /*
4210  * Figure out how much the filesystem can be grown. The limiting factor is
4211  * the available free space needed to extend the cg summary info block.
4212  * The free space is determined in three steps:
4213  * - Try to extend the cg summary block to the required size.
4214  * - Find free blocks in last cg.
4215  * - Find free space in the last already allocated fragment of the summary info
4216  *   block, and use it for additional csum structures.
4217  * Return the maximum size of the new filesystem or 0 if it can't be grown.
4218  * Please note that this function leaves the global list pointers csfrag,
4219  * csfragfree, and csfragino initialized, and the caller is responsible for
4220  * freeing the lists.
4221  */
4222 diskaddr_t
4223 probe_summaryinfo()
4224 {
4225 	/* fragments by which the csum block can be extended. */
4226 	int64_t 	growth_csum_frags = 0;
4227 	/* fragments by which the filesystem can be extended. */
4228 	int64_t		growth_fs_frags = 0;
4229 	int64_t		new_fs_cssize;	/* size of csum blk in the new FS */
4230 	int64_t		new_fs_ncg;	/* number of cg in the new FS */
4231 	int64_t 	spare_csum;
4232 	daddr32_t	oldfrag_daddr;
4233 	daddr32_t	newfrag_daddr;
4234 	daddr32_t	daddr;
4235 	int		i;
4236 
4237 	/*
4238 	 * read and verify the superblock
4239 	 */
4240 	rdfs((diskaddr_t)(SBOFF / sectorsize), (int)sbsize, (char *)&sblock);
4241 	checksblock();
4242 
4243 	/*
4244 	 * check how much we can extend the cg summary info block
4245 	 */
4246 
4247 	/*
4248 	 * read current summary information
4249 	 */
4250 	fscs = read_summaryinfo(&sblock);
4251 
4252 	/*
4253 	 * build list of frags needed for cg summary info block extension
4254 	 */
4255 	oldfrag_daddr = howmany(sblock.fs_cssize, sblock.fs_fsize) +
4256 		sblock.fs_csaddr;
4257 	new_fs_ncg = howmany(dbtofsb(&sblock, fssize_db), sblock.fs_fpg);
4258 	new_fs_cssize = fragroundup(&sblock, new_fs_ncg * sizeof (struct csum));
4259 	newfrag_daddr = howmany(new_fs_cssize, sblock.fs_fsize) +
4260 		sblock.fs_csaddr;
4261 	/*
4262 	 * add all of the frags that are required to grow the cyl summary to the
4263 	 * csfrag list, which is the generic/unknown list, since at this point
4264 	 * we don't yet know the state of those frags.
4265 	 */
4266 	for (daddr = oldfrag_daddr; daddr < newfrag_daddr; daddr++)
4267 		addcsfrag((ino_t)0, daddr, &csfrag);
4268 
4269 	/*
4270 	 * filter free fragments and allocate them. Note that the free frags
4271 	 * must be allocated first otherwise they could be grabbed by
4272 	 * alloccsfragino() for data frags.
4273 	 */
4274 	findcsfragfree();
4275 	alloccsfragfree();
4276 
4277 	/*
4278 	 * filter fragments owned by inodes and allocate them
4279 	 */
4280 	grow_fs_ncg = sblock.fs_ncg; /* findcsfragino() needs this glob. var. */
4281 	findcsfragino();
4282 	alloccsfragino();
4283 
4284 	if (notenoughspace()) {
4285 		/*
4286 		 * check how many consecutive fragments could be allocated
4287 		 * in both lists.
4288 		 */
4289 		int64_t tmp_frags;
4290 		for (daddr = oldfrag_daddr; daddr < newfrag_daddr;
4291 			daddr += tmp_frags) {
4292 			if ((tmp_frags = checkfragallocated(daddr)) > 0)
4293 				growth_csum_frags += tmp_frags;
4294 			else
4295 				break;
4296 		}
4297 	} else {
4298 		/*
4299 		 * We have all we need for the new desired size,
4300 		 * so clean up and report back.
4301 		 */
4302 		return (fssize_db);
4303 	}
4304 
4305 	/*
4306 	 * given the number of fragments by which the csum block can be grown
4307 	 * compute by how many new fragments the FS can be increased.
4308 	 * It is the number of csum instances per fragment multiplied by
4309 	 * `growth_csum_frags' and the number of fragments per cylinder group.
4310 	 */
4311 	growth_fs_frags = howmany(sblock.fs_fsize, sizeof (struct csum)) *
4312 		growth_csum_frags * sblock.fs_fpg;
4313 
4314 	/*
4315 	 * compute free fragments in the last cylinder group
4316 	 */
4317 	rdcg(sblock.fs_ncg - 1);
4318 	growth_fs_frags += sblock.fs_fpg - acg.cg_ndblk;
4319 
4320 	/*
4321 	 * compute how many csum instances are unused in the old csum block.
4322 	 * For each unused csum instance the FS can be grown by one cylinder
4323 	 * group without extending the csum block.
4324 	 */
4325 	spare_csum = howmany(sblock.fs_cssize, sizeof (struct csum)) -
4326 		sblock.fs_ncg;
4327 	if (spare_csum > 0)
4328 		growth_fs_frags += spare_csum * sblock.fs_fpg;
4329 
4330 	/*
4331 	 * recalculate the new filesystem size in sectors, shorten it by
4332 	 * the requested size `fssize_db' if necessary.
4333 	 */
4334 	if (growth_fs_frags > 0) {
4335 		diskaddr_t sect;
4336 		sect = (sblock.fs_size + growth_fs_frags) * sblock.fs_nspf;
4337 		return ((sect > fssize_db) ? fssize_db : sect);
4338 	}
4339 
4340 	return (0);
4341 }
4342 
4343 void
4344 extendsummaryinfo()
4345 {
4346 	int64_t		i;
4347 	int		localtest	= test;
4348 	int64_t		frags;
4349 	daddr32_t		oldfrag;
4350 	daddr32_t		newfrag;
4351 
4352 	/*
4353 	 * if no-write (-N), don't bother
4354 	 */
4355 	if (Nflag)
4356 		return;
4357 
4358 again:
4359 	flcg();
4360 	/*
4361 	 * summary info did not change size -- do nothing unless in test mode
4362 	 */
4363 	if (grow_fs_cssize == sblock.fs_cssize)
4364 		if (!localtest)
4365 			return;
4366 
4367 	/*
4368 	 * build list of frags needed for additional summary information
4369 	 */
4370 	oldfrag = howmany(grow_fs_cssize, sblock.fs_fsize) + grow_fs_csaddr;
4371 	newfrag = howmany(sblock.fs_cssize, sblock.fs_fsize) + grow_fs_csaddr;
4372 	/*
4373 	 * add all of the frags that are required to grow the cyl summary to the
4374 	 * csfrag list, which is the generic/unknown list, since at this point
4375 	 * we don't yet know the state of those frags.
4376 	 */
4377 	for (i = oldfrag, frags = 0; i < newfrag; ++i, ++frags)
4378 		addcsfrag((ino_t)0, (diskaddr_t)i, &csfrag);
4379 	/*
4380 	 * reduce the number of data blocks in the file system (fs_dsize) by
4381 	 * the number of frags that need to be added to the cyl summary
4382 	 */
4383 	sblock.fs_dsize -= (newfrag - oldfrag);
4384 
4385 	/*
4386 	 * In test mode, we move more data than necessary from
4387 	 * cylinder group 0.  The lookup/allocate/move code can be
4388 	 * better stressed without having to create HUGE file systems.
4389 	 */
4390 	if (localtest)
4391 		for (i = newfrag; i < grow_sifrag; ++i) {
4392 			if (frags >= testfrags)
4393 				break;
4394 			frags++;
4395 			addcsfrag((ino_t)0, (diskaddr_t)i, &csfrag);
4396 		}
4397 
4398 	/*
4399 	 * move frags to free or inode lists, depending on owner
4400 	 */
4401 	findcsfragfree();
4402 	findcsfragino();
4403 
4404 	/*
4405 	 * if not all frags can be located, file system must be inconsistent
4406 	 */
4407 	if (csfrag) {
4408 		isbad = 1;	/* should already be set, but make sure */
4409 		lockexit(32);
4410 	}
4411 
4412 	/*
4413 	 * allocate the free frags. Note that the free frags must be allocated
4414 	 * first otherwise they could be grabbed by alloccsfragino() for data
4415 	 * frags.
4416 	 */
4417 	alloccsfragfree();
4418 	/*
4419 	 * allocate extra space for inode frags
4420 	 */
4421 	alloccsfragino();
4422 
4423 	/*
4424 	 * not enough space
4425 	 */
4426 	if (notenoughspace()) {
4427 		unalloccsfragfree();
4428 		unalloccsfragino();
4429 		if (localtest && !testforce) {
4430 			localtest = 0;
4431 			goto again;
4432 		}
4433 		(void) fprintf(stderr, gettext("Not enough free space\n"));
4434 		lockexit(NOTENOUGHSPACE);
4435 	}
4436 
4437 	/*
4438 	 * copy the data from old frags to new frags
4439 	 */
4440 	copycsfragino();
4441 
4442 	/*
4443 	 * fix the inodes to point to the new frags
4444 	 */
4445 	fixcsfragino();
4446 
4447 	/*
4448 	 * We may have moved more frags than we needed.  Free them.
4449 	 */
4450 	rdcg((long)0);
4451 	for (i = newfrag; i <= maxcsfrag; ++i)
4452 		setbit(cg_blksfree(&acg), i-cgbase(&sblock, 0));
4453 	wtcg();
4454 
4455 	flcg();
4456 }
4457 
4458 /*
4459  * Check if all fragments in the `csfragino' list were reallocated.
4460  */
4461 int
4462 notenoughspace()
4463 {
4464 	struct csfrag	*cfp;
4465 
4466 	/*
4467 	 * If any element in the csfragino array has a "new frag location"
4468 	 * of 0, the allocfrags() function was unsuccessful in allocating
4469 	 * space for moving the frag represented by this array element.
4470 	 */
4471 	for (cfp = csfragino; cfp; cfp = cfp->next)
4472 		if (cfp->nfrag == 0)
4473 			return (1);
4474 	return (0);
4475 }
4476 
4477 void
4478 unalloccsfragino()
4479 {
4480 	struct csfrag	*cfp;
4481 
4482 	while ((cfp = csfragino) != NULL) {
4483 		if (cfp->nfrag)
4484 			freefrags(cfp->nfrag, cfp->frags, cfp->cylno);
4485 		delcsfrag(cfp->ofrag, &csfragino);
4486 	}
4487 }
4488 
4489 void
4490 unalloccsfragfree()
4491 {
4492 	struct csfrag	*cfp;
4493 
4494 	while ((cfp = csfragfree) != NULL) {
4495 		freefrags(cfp->ofrag, cfp->frags, cfp->cylno);
4496 		delcsfrag(cfp->ofrag, &csfragfree);
4497 	}
4498 }
4499 
4500 /*
4501  * For each frag in the "as-yet-unclassified" list (csfrag), see if
4502  * it's free (i.e., its bit is set in the free frag bit map).  If so,
4503  * move it from the "as-yet-unclassified" list to the csfragfree list.
4504  */
4505 void
4506 findcsfragfree()
4507 {
4508 	struct csfrag	*cfp;
4509 	struct csfrag	*cfpnext;
4510 
4511 	/*
4512 	 * move free frags onto the free-frag list
4513 	 */
4514 	rdcg((long)0);
4515 	for (cfp = csfrag; cfp; cfp = cfpnext) {
4516 		cfpnext = cfp->next;
4517 		if (isset(cg_blksfree(&acg), cfp->ofrag - cgbase(&sblock, 0))) {
4518 			addcsfrag(cfp->ino, cfp->ofrag, &csfragfree);
4519 			delcsfrag(cfp->ofrag, &csfrag);
4520 		}
4521 	}
4522 }
4523 
4524 void
4525 copycsfragino()
4526 {
4527 	struct csfrag	*cfp;
4528 	char		buf[MAXBSIZE];
4529 
4530 	/*
4531 	 * copy data from old frags to newly allocated frags
4532 	 */
4533 	for (cfp = csfragino; cfp; cfp = cfp->next) {
4534 		rdfs(fsbtodb(&sblock, (uint64_t)cfp->ofrag), (int)cfp->size,
4535 		    buf);
4536 		wtfs(fsbtodb(&sblock, (uint64_t)cfp->nfrag), (int)cfp->size,
4537 		    buf);
4538 	}
4539 }
4540 
4541 long	curcylno	= -1;
4542 int	cylnodirty	= 0;
4543 
4544 void
4545 rdcg(long cylno)
4546 {
4547 	if (cylno != curcylno) {
4548 		flcg();
4549 		curcylno = cylno;
4550 		rdfs(fsbtodb(&sblock, (uint64_t)cgtod(&sblock, curcylno)),
4551 			(int)sblock.fs_cgsize, (char *)&acg);
4552 	}
4553 }
4554 
4555 void
4556 flcg()
4557 {
4558 	if (cylnodirty) {
4559 		if (debug && Pflag) {
4560 			(void) fprintf(stderr,
4561 				"Assert: cylnodirty set in probe mode\n");
4562 			return;
4563 		}
4564 		resetallocinfo();
4565 		wtfs(fsbtodb(&sblock, (uint64_t)cgtod(&sblock, curcylno)),
4566 			(int)sblock.fs_cgsize, (char *)&acg);
4567 		cylnodirty = 0;
4568 	}
4569 	curcylno = -1;
4570 }
4571 
4572 void
4573 wtcg()
4574 {
4575 	if (!Pflag) {
4576 		/* probe mode should never write to disk */
4577 		cylnodirty = 1;
4578 	}
4579 }
4580 
4581 void
4582 allocfrags(long frags, daddr32_t *fragp, long *cylnop)
4583 {
4584 	int	 i;
4585 	int	 j;
4586 	long	 bits;
4587 	long	 bit;
4588 
4589 	/*
4590 	 * Allocate a free-frag range in an old cylinder group
4591 	 */
4592 	for (i = 0, *fragp = 0; i < grow_fs_ncg; ++i) {
4593 		if (((fscs+i)->cs_nffree < frags) && ((fscs+i)->cs_nbfree == 0))
4594 			continue;
4595 		rdcg((long)i);
4596 		bit = bits = 0;
4597 		while (findfreerange(&bit, &bits)) {
4598 			if (frags <= bits)  {
4599 				for (j = 0; j < frags; ++j)
4600 					clrbit(cg_blksfree(&acg), bit+j);
4601 				wtcg();
4602 				*cylnop = i;
4603 				*fragp  = bit + cgbase(&sblock, i);
4604 				return;
4605 			}
4606 			bit += bits;
4607 		}
4608 	}
4609 }
4610 
4611 /*
4612  * Allocate space for frags that need to be moved in order to free up space for
4613  * expanding the cylinder summary info.
4614  * For each frag that needs to be moved (each frag or range of frags in
4615  * the csfragino list), allocate a new location and store the frag number
4616  * of that new location in the nfrag field of the csfrag struct.
4617  * If a new frag can't be allocated for any element in the csfragino list,
4618  * set the new frag number for that element to 0 and return immediately.
4619  * The notenoughspace() function will detect this condition.
4620  */
4621 void
4622 alloccsfragino()
4623 {
4624 	struct csfrag	*cfp;
4625 
4626 	/*
4627 	 * allocate space for inode frag ranges
4628 	 */
4629 	for (cfp = csfragino; cfp; cfp = cfp->next) {
4630 		allocfrags(cfp->frags, &cfp->nfrag, &cfp->cylno);
4631 		if (cfp->nfrag == 0)
4632 			break;
4633 	}
4634 }
4635 
4636 void
4637 alloccsfragfree()
4638 {
4639 	struct csfrag	*cfp;
4640 
4641 	/*
4642 	 * allocate the free frags needed for extended summary info
4643 	 */
4644 	rdcg((long)0);
4645 
4646 	for (cfp = csfragfree; cfp; cfp = cfp->next)
4647 		clrbit(cg_blksfree(&acg), cfp->ofrag - cgbase(&sblock, 0));
4648 
4649 	wtcg();
4650 }
4651 
4652 void
4653 freefrags(daddr32_t frag, long frags, long cylno)
4654 {
4655 	int	i;
4656 
4657 	/*
4658 	 * free frags
4659 	 */
4660 	rdcg(cylno);
4661 	for (i = 0; i < frags; ++i) {
4662 		setbit(cg_blksfree(&acg), (frag+i) - cgbase(&sblock, cylno));
4663 	}
4664 	wtcg();
4665 }
4666 
4667 int
4668 findfreerange(long *bitp, long *bitsp)
4669 {
4670 	long	 bit;
4671 
4672 	/*
4673 	 * find a range of free bits in a cylinder group bit map
4674 	 */
4675 	for (bit = *bitp, *bitsp = 0; bit < acg.cg_ndblk; ++bit)
4676 		if (isset(cg_blksfree(&acg), bit))
4677 			break;
4678 
4679 	if (bit >= acg.cg_ndblk)
4680 		return (0);
4681 
4682 	*bitp  = bit;
4683 	*bitsp = 1;
4684 	for (++bit; bit < acg.cg_ndblk; ++bit, ++(*bitsp)) {
4685 		if ((bit % sblock.fs_frag) == 0)
4686 			break;
4687 		if (isclr(cg_blksfree(&acg), bit))
4688 			break;
4689 	}
4690 	return (1);
4691 }
4692 
4693 void
4694 resetallocinfo()
4695 {
4696 	long	cno;
4697 	long	bit;
4698 	long	bits;
4699 
4700 	/*
4701 	 * Compute the free blocks/frags info and update the appropriate
4702 	 * inmemory superblock, summary info, and cylinder group fields
4703 	 */
4704 	sblock.fs_cstotal.cs_nffree -= acg.cg_cs.cs_nffree;
4705 	sblock.fs_cstotal.cs_nbfree -= acg.cg_cs.cs_nbfree;
4706 
4707 	acg.cg_cs.cs_nffree = 0;
4708 	acg.cg_cs.cs_nbfree = 0;
4709 
4710 	bzero((caddr_t)acg.cg_frsum, sizeof (acg.cg_frsum));
4711 	bzero((caddr_t)cg_blktot(&acg), (int)(acg.cg_iusedoff-acg.cg_btotoff));
4712 
4713 	bit = bits = 0;
4714 	while (findfreerange(&bit, &bits)) {
4715 		if (bits == sblock.fs_frag) {
4716 			acg.cg_cs.cs_nbfree++;
4717 			cno = cbtocylno(&sblock, bit);
4718 			cg_blktot(&acg)[cno]++;
4719 			cg_blks(&sblock, &acg, cno)[cbtorpos(&sblock, bit)]++;
4720 		} else {
4721 			acg.cg_cs.cs_nffree += bits;
4722 			acg.cg_frsum[bits]++;
4723 		}
4724 		bit += bits;
4725 	}
4726 
4727 	*(fscs + acg.cg_cgx) = acg.cg_cs;
4728 
4729 	sblock.fs_cstotal.cs_nffree += acg.cg_cs.cs_nffree;
4730 	sblock.fs_cstotal.cs_nbfree += acg.cg_cs.cs_nbfree;
4731 }
4732 
4733 void
4734 extendcg(long cylno)
4735 {
4736 	int	i;
4737 	diskaddr_t	dupper;
4738 	diskaddr_t	cbase;
4739 	diskaddr_t	dmax;
4740 
4741 	/*
4742 	 * extend the cylinder group at the end of the old file system
4743 	 * if it was partially allocated becase of lack of space
4744 	 */
4745 	flcg();
4746 	rdcg(cylno);
4747 
4748 	dupper = acg.cg_ndblk;
4749 	if (cylno == sblock.fs_ncg - 1)
4750 		acg.cg_ncyl = sblock.fs_ncyl - (sblock.fs_cpg * cylno);
4751 	else
4752 		acg.cg_ncyl = sblock.fs_cpg;
4753 	cbase = cgbase(&sblock, cylno);
4754 	dmax = cbase + sblock.fs_fpg;
4755 	if (dmax > sblock.fs_size)
4756 		dmax = sblock.fs_size;
4757 	acg.cg_ndblk = dmax - cbase;
4758 
4759 	for (i = dupper; i < acg.cg_ndblk; ++i)
4760 		setbit(cg_blksfree(&acg), i);
4761 
4762 	sblock.fs_dsize += (acg.cg_ndblk - dupper);
4763 
4764 	wtcg();
4765 	flcg();
4766 }
4767 
4768 struct lockfs	lockfs;
4769 int		lockfd;
4770 int		islocked;
4771 int		lockfskey;
4772 char		lockfscomment[128];
4773 
4774 void
4775 ulockfs()
4776 {
4777 	/*
4778 	 * if the file system was locked, unlock it before exiting
4779 	 */
4780 	if (islocked == 0)
4781 		return;
4782 
4783 	/*
4784 	 * first, check if the lock held
4785 	 */
4786 	lockfs.lf_flags = LOCKFS_MOD;
4787 	if (ioctl(lockfd, _FIOLFSS, &lockfs) == -1) {
4788 		perror(directory);
4789 		lockexit(32);
4790 	}
4791 
4792 	if (LOCKFS_IS_MOD(&lockfs)) {
4793 		(void) fprintf(stderr,
4794 			gettext("FILE SYSTEM CHANGED DURING GROWFS!\n"));
4795 		(void) fprintf(stderr,
4796 			gettext("   See lockfs(1), umount(1), and fsck(1)\n"));
4797 		lockexit(32);
4798 	}
4799 	/*
4800 	 * unlock the file system
4801 	 */
4802 	lockfs.lf_lock  = LOCKFS_ULOCK;
4803 	lockfs.lf_flags = 0;
4804 	lockfs.lf_key   = lockfskey;
4805 	clockfs();
4806 	if (ioctl(lockfd, _FIOLFS, &lockfs) == -1) {
4807 		perror(directory);
4808 		lockexit(32);
4809 	}
4810 }
4811 
4812 void
4813 wlockfs()
4814 {
4815 
4816 	/*
4817 	 * if no-write (-N), don't bother
4818 	 */
4819 	if (Nflag)
4820 		return;
4821 	/*
4822 	 * open the mountpoint, and write lock the file system
4823 	 */
4824 	if ((lockfd = open64(directory, O_RDONLY)) == -1) {
4825 		perror(directory);
4826 		lockexit(32);
4827 	}
4828 
4829 	/*
4830 	 * check if it is already locked
4831 	 */
4832 	if (ioctl(lockfd, _FIOLFSS, &lockfs) == -1) {
4833 		perror(directory);
4834 		lockexit(32);
4835 	}
4836 
4837 	if (lockfs.lf_lock != LOCKFS_WLOCK) {
4838 		lockfs.lf_lock  = LOCKFS_WLOCK;
4839 		lockfs.lf_flags = 0;
4840 		lockfs.lf_key   = 0;
4841 		clockfs();
4842 		if (ioctl(lockfd, _FIOLFS, &lockfs) == -1) {
4843 			perror(directory);
4844 			lockexit(32);
4845 		}
4846 	}
4847 	islocked = 1;
4848 	lockfskey = lockfs.lf_key;
4849 }
4850 
4851 void
4852 clockfs()
4853 {
4854 	time_t	t;
4855 	char	*ct;
4856 
4857 	(void) time(&t);
4858 	ct = ctime(&t);
4859 	ct[strlen(ct)-1] = '\0';
4860 
4861 	(void) sprintf(lockfscomment, "%s -- mkfs pid %d", ct, getpid());
4862 	lockfs.lf_comlen  = strlen(lockfscomment)+1;
4863 	lockfs.lf_comment = lockfscomment;
4864 }
4865 
4866 /*
4867  * Write the csum records and the superblock
4868  */
4869 void
4870 wtsb()
4871 {
4872 	long	i;
4873 
4874 	/*
4875 	 * write summary information
4876 	 */
4877 	for (i = 0; i < sblock.fs_cssize; i += sblock.fs_bsize)
4878 		wtfs(fsbtodb(&sblock, (uint64_t)(sblock.fs_csaddr +
4879 			numfrags(&sblock, i))),
4880 			(int)(sblock.fs_cssize - i < sblock.fs_bsize ?
4881 			sblock.fs_cssize - i : sblock.fs_bsize),
4882 			((char *)fscs) + i);
4883 
4884 	/*
4885 	 * write superblock
4886 	 */
4887 	sblock.fs_time = mkfstime;
4888 	wtfs((diskaddr_t)(SBOFF / sectorsize), sbsize, (char *)&sblock);
4889 }
4890 
4891 /*
4892  * Verify that the optimization selection is reasonable, and advance
4893  * the global "string" appropriately.
4894  */
4895 static char
4896 checkopt(char *optim)
4897 {
4898 	char	opt;
4899 	int	limit = strcspn(optim, ",");
4900 
4901 	switch (limit) {
4902 	case 0:	/* missing indicator (have comma or nul) */
4903 		(void) fprintf(stderr, gettext(
4904 		    "mkfs: missing optimization flag reset to `t' (time)\n"));
4905 		opt = 't';
4906 		break;
4907 
4908 	case 1: /* single-character indicator */
4909 		opt = *optim;
4910 		if ((opt != 's') && (opt != 't')) {
4911 			(void) fprintf(stderr, gettext(
4912 		    "mkfs: bad optimization value `%c' reset to `t' (time)\n"),
4913 			    opt);
4914 			opt = 't';
4915 		}
4916 		break;
4917 
4918 	default: /* multi-character indicator */
4919 		(void) fprintf(stderr, gettext(
4920 	    "mkfs: bad optimization value `%*.*s' reset to `t' (time)\n"),
4921 		    limit, limit, optim);
4922 		opt = 't';
4923 		break;
4924 	}
4925 
4926 	string += limit;
4927 
4928 	return (opt);
4929 }
4930 
4931 /*
4932  * Verify that the mtb selection is reasonable, and advance
4933  * the global "string" appropriately.
4934  */
4935 static char
4936 checkmtb(char *mtbarg)
4937 {
4938 	char	mtbc;
4939 	int	limit = strcspn(mtbarg, ",");
4940 
4941 	switch (limit) {
4942 	case 0:	/* missing indicator (have comma or nul) */
4943 		(void) fprintf(stderr, gettext(
4944 		    "mkfs: missing mtb flag reset to `n' (no mtb support)\n"));
4945 		mtbc = 'n';
4946 		break;
4947 
4948 	case 1: /* single-character indicator */
4949 		mtbc = tolower(*mtbarg);
4950 		if ((mtbc != 'y') && (mtbc != 'n')) {
4951 			(void) fprintf(stderr, gettext(
4952 		    "mkfs: bad mtb value `%c' reset to `n' (no mtb support)\n"),
4953 			    mtbc);
4954 			mtbc = 'n';
4955 		}
4956 		break;
4957 
4958 	default: /* multi-character indicator */
4959 		(void) fprintf(stderr, gettext(
4960 	    "mkfs: bad mtb value `%*.*s' reset to `n' (no mtb support)\n"),
4961 		    limit, limit, mtbarg);
4962 		opt = 'n';
4963 		break;
4964 	}
4965 
4966 	string += limit;
4967 
4968 	return (mtbc);
4969 }
4970 
4971 /*
4972  * Verify that a value is in a range.  If it is not, resets it to
4973  * its default value if one is supplied, exits otherwise.
4974  *
4975  * When testing, can compare user_supplied to RC_KEYWORD or RC_POSITIONAL.
4976  */
4977 static void
4978 range_check(long *varp, char *name, long minimum, long maximum,
4979     long def_val, int user_supplied)
4980 {
4981 	if ((*varp < minimum) || (*varp > maximum)) {
4982 		if (user_supplied != RC_DEFAULT) {
4983 			(void) fprintf(stderr, gettext(
4984 	    "mkfs: bad value for %s: %ld must be between %ld and %ld\n"),
4985 			    name, *varp, minimum, maximum);
4986 		}
4987 		if (def_val != NO_DEFAULT) {
4988 			if (user_supplied) {
4989 				(void) fprintf(stderr,
4990 				    gettext("mkfs: %s reset to default %ld\n"),
4991 				    name, def_val);
4992 			}
4993 			*varp = def_val;
4994 			return;
4995 		}
4996 		lockexit(2);
4997 		/*NOTREACHED*/
4998 	}
4999 }
5000 
5001 /*
5002  * Verify that a value is in a range.  If it is not, resets it to
5003  * its default value if one is supplied, exits otherwise.
5004  *
5005  * When testing, can compare user_supplied to RC_KEYWORD or RC_POSITIONAL.
5006  */
5007 static void
5008 range_check_64(uint64_t *varp, char *name, uint64_t minimum, uint64_t maximum,
5009     uint64_t def_val, int user_supplied)
5010 {
5011 	if ((*varp < minimum) || (*varp > maximum)) {
5012 		if (user_supplied != RC_DEFAULT) {
5013 			(void) fprintf(stderr, gettext(
5014 	    "mkfs: bad value for %s: %lld must be between %lld and %lld\n"),
5015 			    name, *varp, minimum, maximum);
5016 		}
5017 		if (def_val != NO_DEFAULT) {
5018 			if (user_supplied) {
5019 				(void) fprintf(stderr,
5020 				    gettext("mkfs: %s reset to default %lld\n"),
5021 				    name, def_val);
5022 			}
5023 			*varp = def_val;
5024 			return;
5025 		}
5026 		lockexit(2);
5027 		/*NOTREACHED*/
5028 	}
5029 }
5030 
5031 /*
5032  * Blocks SIGINT from delivery.  Returns the previous mask in the
5033  * buffer provided, so that mask may be later restored.
5034  */
5035 static void
5036 block_sigint(sigset_t *old_mask)
5037 {
5038 	sigset_t block_mask;
5039 
5040 	if (sigemptyset(&block_mask) < 0) {
5041 		fprintf(stderr, gettext("Could not clear signal mask\n"));
5042 		lockexit(3);
5043 	}
5044 	if (sigaddset(&block_mask, SIGINT) < 0) {
5045 		fprintf(stderr, gettext("Could not set signal mask\n"));
5046 		lockexit(3);
5047 	}
5048 	if (sigprocmask(SIG_BLOCK, &block_mask, old_mask) < 0) {
5049 		fprintf(stderr, gettext("Could not block SIGINT\n"));
5050 		lockexit(3);
5051 	}
5052 }
5053 
5054 /*
5055  * Restores the signal mask that was in force before a call
5056  * to block_sigint().  This may actually still have SIGINT blocked,
5057  * if we've been recursively invoked.
5058  */
5059 static void
5060 unblock_sigint(sigset_t *old_mask)
5061 {
5062 	if (sigprocmask(SIG_UNBLOCK, old_mask, (sigset_t *)NULL) < 0) {
5063 		fprintf(stderr, gettext("Could not restore signal mask\n"));
5064 		lockexit(3);
5065 	}
5066 }
5067 
5068 /*
5069  * Attempt to be somewhat graceful about being interrupted, rather than
5070  * just silently leaving the filesystem in an unusable state.
5071  *
5072  * The kernel has blocked SIGINT upon entry, so we don't have to worry
5073  * about recursion if the user starts pounding on the keyboard.
5074  */
5075 static void
5076 recover_from_sigint(int signum)
5077 {
5078 	if (fso > -1) {
5079 		if ((Nflag != 0) || confirm_abort()) {
5080 			lockexit(4);
5081 		}
5082 	}
5083 }
5084 
5085 static int
5086 confirm_abort(void)
5087 {
5088 	char line[80];
5089 
5090 	printf(gettext("\n\nAborting at this point will leave the filesystem "
5091 		"in an inconsistent\nstate.  If you do choose to stop, "
5092 		"you will be given instructions on how to\nrecover "
5093 		"the filesystem.  Do you wish to cancel the filesystem "
5094 		"grow\noperation (y/n)?"));
5095 	if (getline(stdin, line, sizeof (line)) == EOF)
5096 		line[0] = 'y';
5097 
5098 	printf("\n");
5099 	if (line[0] == 'y' || line[0] == 'Y')
5100 		return (1);
5101 	else {
5102 		return (0);
5103 	}
5104 }
5105 
5106 static int
5107 getline(FILE *fp, char *loc, int maxlen)
5108 {
5109 	int n;
5110 	char *p, *lastloc;
5111 
5112 	p = loc;
5113 	lastloc = &p[maxlen-1];
5114 	while ((n = getc(fp)) != '\n') {
5115 		if (n == EOF)
5116 			return (EOF);
5117 		if (!isspace(n) && p < lastloc)
5118 			*p++ = n;
5119 	}
5120 	*p = 0;
5121 	return (p - loc);
5122 }
5123 
5124 /*
5125  * Calculate the maximum value of cylinders-per-group for a file
5126  * system with the characteristics:
5127  *
5128  *	bsize - file system block size
5129  *	fragsize - frag size
5130  *	nbpi - number of bytes of disk space per inode
5131  *	nrpos - number of rotational positions
5132  *	spc - sectors per cylinder
5133  *
5134  * These five characteristic are not adjustable (by this function).
5135  * The only attribute of the file system which IS adjusted by this
5136  * function in order to maximize cylinders-per-group is the proportion
5137  * of the cylinder group overhead block used for the inode map.  The
5138  * inode map cannot occupy more than one-third of the cylinder group
5139  * overhead block, but it's OK for it to occupy less than one-third
5140  * of the overhead block.
5141  *
5142  * The setting of nbpi determines one possible value for the maximum
5143  * size of a cylinder group.  It does so because it determines the total
5144  * number of inodes in the file system (file system size is fixed, and
5145  * nbpi is fixed, so the total number of inodes is fixed too).  The
5146  * cylinder group has to be small enough so that the number of inodes
5147  * in the cylinder group is less than or equal to the number of bits
5148  * in one-third (or whatever proportion is assumed) of a file system
5149  * block.  The details of the calculation are:
5150  *
5151  *     The macro MAXIpG_B(bsize, inode_divisor) determines the maximum
5152  *     number of inodes that can be in a cylinder group, given the
5153  *     proportion of the cylinder group overhead block used for the
5154  *     inode bitmaps (an inode_divisor of 3 means that 1/3 of the
5155  *     block is used for inode bitmaps; an inode_divisor of 12 means
5156  *     that 1/12 of the block is used for inode bitmaps.)
5157  *
5158  *     Once the number of inodes per cylinder group is known, the
5159  *     maximum value of cylinders-per-group (determined by nbpi)
5160  *     is calculated by the formula
5161  *
5162  *     maxcpg_given_nbpi = (size of a cylinder group)/(size of a cylinder)
5163  *
5164  *			 = (inodes-per-cg * nbpi)/(spc * DEV_BSIZE)
5165  *
5166  *     (Interestingly, the size of the file system never enters
5167  *     into this calculation.)
5168  *
5169  * Another possible value for the maximum cylinder group size is determined
5170  * by frag_size and nrpos.  The frags in the cylinder group must be
5171  * representable in the frag bitmaps in the cylinder overhead block and the
5172  * rotational positions for each cylinder must be represented in the
5173  * rotational position tables.  The calculation of the maximum cpg
5174  * value, given the frag and nrpos vales, is:
5175  *
5176  *     maxcpg_given_fragsize =
5177  *	  (available space in the overhead block) / (size of per-cylinder data)
5178  *
5179  *     The available space in the overhead block =
5180  *	  bsize - sizeof (struct cg) - space_used_for_inode_bitmaps
5181  *
5182  *     The size of the per-cylinder data is:
5183  *	    sizeof(long)            # for the "blocks avail per cylinder" field
5184  *	    + nrpos * sizeof(short)   # for the rotational position table entry
5185  *	    + frags-per-cylinder/NBBY # number of bytes to represent this
5186  *				      # cylinder in the frag bitmap
5187  *
5188  * The two calculated maximum values of cylinder-per-group will typically
5189  * turn out to be different, since they are derived from two different
5190  * constraints.  Usually, maxcpg_given_nbpi is much bigger than
5191  * maxcpg_given_fragsize.  But they can be brought together by
5192  * adjusting the proportion of the overhead block dedicated to
5193  * the inode bitmaps.  Decreasing the proportion of the cylinder
5194  * group overhead block used for inode maps will decrease
5195  * maxcpg_given_nbpi and increase maxcpg_given_fragsize.
5196  *
5197  * This function calculates the initial values of maxcpg_given_nbpi
5198  * and maxcpg_given_fragsize assuming that 1/3 of the cg overhead
5199  * block is used for inode bitmaps.  Then it decreases the proportion
5200  * of the cg overhead block used for inode bitmaps (by increasing
5201  * the value of inode_divisor) until maxcpg_given_nbpi and
5202  * maxcpg_given_fragsize are the same, or stop changing, or
5203  * maxcpg_given_nbpi is less than maxcpg_given_fragsize.
5204  *
5205  * The loop terminates when any of the following occur:
5206  *	* maxcpg_given_fragsize is greater than or equal to
5207  *	  maxcpg_given_nbpi
5208  *	* neither maxcpg_given_fragsize nor maxcpg_given_nbpi
5209  *	  change in the expected direction
5210  *
5211  * The loop is guaranteed to terminate because it only continues
5212  * while maxcpg_given_fragsize and maxcpg_given_nbpi are approaching
5213  * each other.  As soon they cross each other, or neither one changes
5214  * in the direction of the other, or one of them moves in the wrong
5215  * direction, the loop completes.
5216  */
5217 
5218 static long
5219 compute_maxcpg(long bsize, long fragsize, long nbpi, long nrpos, long spc)
5220 {
5221 	int	maxcpg_given_nbpi;	/* in cylinders */
5222 	int	maxcpg_given_fragsize;	/* in cylinders */
5223 	int	spf;			/* sectors per frag */
5224 	int	inode_divisor;
5225 	int	old_max_given_frag = 0;
5226 	int	old_max_given_nbpi = INT_MAX;
5227 
5228 	spf = fragsize / DEV_BSIZE;
5229 	inode_divisor = 3;
5230 
5231 	while (1) {
5232 		maxcpg_given_nbpi =
5233 		    (((int64_t)(MAXIpG_B(bsize, inode_divisor))) * nbpi) /
5234 		    (DEV_BSIZE * ((int64_t)spc));
5235 		maxcpg_given_fragsize =
5236 		    (bsize - (sizeof (struct cg)) - (bsize / inode_divisor)) /
5237 		    (sizeof (long) + nrpos * sizeof (short) +
5238 						(spc / spf) / NBBY);
5239 
5240 		if (maxcpg_given_fragsize >= maxcpg_given_nbpi)
5241 			return (maxcpg_given_nbpi);
5242 
5243 		/*
5244 		 * If neither value moves toward the other, return the
5245 		 * least of the old values (we use the old instead of the
5246 		 * new because: if the old is the same as the new, it
5247 		 * doesn't matter which ones we use.  If one of the
5248 		 * values changed, but in the wrong direction, the
5249 		 * new values are suspect.  Better use the old.  This
5250 		 * shouldn't happen, but it's best to check.
5251 		 */
5252 
5253 		if (!(maxcpg_given_nbpi < old_max_given_nbpi) &&
5254 		    !(maxcpg_given_fragsize > old_max_given_frag))
5255 			return (MIN(old_max_given_nbpi, old_max_given_frag));
5256 
5257 		/*
5258 		 * This is probably impossible, but if one of the maxcpg
5259 		 * values moved in the "right" direction and one moved
5260 		 * in the "wrong" direction (that is, the two values moved
5261 		 * in the same direction), the previous conditional won't
5262 		 * recognize that the values aren't converging (since at
5263 		 * least one value moved in the "right" direction, the
5264 		 * last conditional says "keep going").
5265 		 *
5266 		 * Just to make absolutely certain that the loop terminates,
5267 		 * check for one of the values moving in the "wrong" direction
5268 		 * and terminate the loop if it happens.
5269 		 */
5270 
5271 		if (maxcpg_given_nbpi > old_max_given_nbpi ||
5272 		    maxcpg_given_fragsize < old_max_given_frag)
5273 			return (MIN(old_max_given_nbpi, old_max_given_frag));
5274 
5275 		old_max_given_nbpi = maxcpg_given_nbpi;
5276 		old_max_given_frag = maxcpg_given_fragsize;
5277 
5278 		inode_divisor++;
5279 	}
5280 }
5281 
5282 static int
5283 in_64bit_mode(void)
5284 {
5285 	/*  cmd must be an absolute path, for security */
5286 	char *cmd = "/usr/bin/isainfo -b";
5287 	char buf[BUFSIZ];
5288 	FILE *ptr;
5289 	int retval = 0;
5290 
5291 	putenv("IFS= \t");
5292 	if ((ptr = popen(cmd, "r")) != NULL) {
5293 		if (fgets(buf, BUFSIZ, ptr) != NULL &&
5294 		    strncmp(buf, "64", 2) == 0)
5295 			retval = 1;
5296 		(void) pclose(ptr);
5297 	}
5298 	return (retval);
5299 }
5300 
5301 /*
5302  * validate_size
5303  *
5304  * Return 1 if the device appears to be at least "size" sectors long.
5305  * Return 0 if it's shorter or we can't read it.
5306  */
5307 
5308 static int
5309 validate_size(int fd, diskaddr_t size)
5310 {
5311 	char 		buf[DEV_BSIZE];
5312 	int rc;
5313 
5314 	if ((llseek(fd, (offset_t)((size - 1) * DEV_BSIZE), SEEK_SET) == -1) ||
5315 	    (read(fd, buf, DEV_BSIZE)) != DEV_BSIZE)
5316 		rc = 0;
5317 	else
5318 		rc = 1;
5319 	return (rc);
5320 }
5321 
5322 /*
5323  * Print every field of the calculated superblock, along with
5324  * its value.  To make parsing easier on the caller, the value
5325  * is printed first, then the name.  Additionally, there's only
5326  * one name/value pair per line.  All values are reported in
5327  * hexadecimal (with the traditional 0x prefix), as that's slightly
5328  * easier for humans to read.  Not that they're expected to, but
5329  * debugging happens.
5330  */
5331 static void
5332 dump_sblock(void)
5333 {
5334 	int row, column, pending, written;
5335 	caddr_t source;
5336 
5337 	if (Rflag) {
5338 		pending = sizeof (sblock);
5339 		source = (caddr_t)&sblock;
5340 		do {
5341 			written = write(fileno(stdout), source, pending);
5342 			pending -= written;
5343 			source += written;
5344 		} while ((pending > 0) && (written > 0));
5345 
5346 		if (written < 0) {
5347 			perror(gettext("Binary dump of superblock failed"));
5348 			lockexit(1);
5349 		}
5350 		return;
5351 	} else {
5352 		printf("0x%x sblock.fs_link\n", sblock.fs_link);
5353 		printf("0x%x sblock.fs_rolled\n", sblock.fs_rolled);
5354 		printf("0x%x sblock.fs_sblkno\n", sblock.fs_sblkno);
5355 		printf("0x%x sblock.fs_cblkno\n", sblock.fs_cblkno);
5356 		printf("0x%x sblock.fs_iblkno\n", sblock.fs_iblkno);
5357 		printf("0x%x sblock.fs_dblkno\n", sblock.fs_dblkno);
5358 		printf("0x%x sblock.fs_cgoffset\n", sblock.fs_cgoffset);
5359 		printf("0x%x sblock.fs_cgmask\n", sblock.fs_cgmask);
5360 		printf("0x%x sblock.fs_time\n", sblock.fs_time);
5361 		printf("0x%x sblock.fs_size\n", sblock.fs_size);
5362 		printf("0x%x sblock.fs_dsize\n", sblock.fs_dsize);
5363 		printf("0x%x sblock.fs_ncg\n", sblock.fs_ncg);
5364 		printf("0x%x sblock.fs_bsize\n", sblock.fs_bsize);
5365 		printf("0x%x sblock.fs_fsize\n", sblock.fs_fsize);
5366 		printf("0x%x sblock.fs_frag\n", sblock.fs_frag);
5367 		printf("0x%x sblock.fs_minfree\n", sblock.fs_minfree);
5368 		printf("0x%x sblock.fs_rotdelay\n", sblock.fs_rotdelay);
5369 		printf("0x%x sblock.fs_rps\n", sblock.fs_rps);
5370 		printf("0x%x sblock.fs_bmask\n", sblock.fs_bmask);
5371 		printf("0x%x sblock.fs_fmask\n", sblock.fs_fmask);
5372 		printf("0x%x sblock.fs_bshift\n", sblock.fs_bshift);
5373 		printf("0x%x sblock.fs_fshift\n", sblock.fs_fshift);
5374 		printf("0x%x sblock.fs_maxcontig\n", sblock.fs_maxcontig);
5375 		printf("0x%x sblock.fs_maxbpg\n", sblock.fs_maxbpg);
5376 		printf("0x%x sblock.fs_fragshift\n", sblock.fs_fragshift);
5377 		printf("0x%x sblock.fs_fsbtodb\n", sblock.fs_fsbtodb);
5378 		printf("0x%x sblock.fs_sbsize\n", sblock.fs_sbsize);
5379 		printf("0x%x sblock.fs_csmask\n", sblock.fs_csmask);
5380 		printf("0x%x sblock.fs_csshift\n", sblock.fs_csshift);
5381 		printf("0x%x sblock.fs_nindir\n", sblock.fs_nindir);
5382 		printf("0x%x sblock.fs_inopb\n", sblock.fs_inopb);
5383 		printf("0x%x sblock.fs_nspf\n", sblock.fs_nspf);
5384 		printf("0x%x sblock.fs_optim\n", sblock.fs_optim);
5385 #ifdef _LITTLE_ENDIAN
5386 		printf("0x%x sblock.fs_state\n", sblock.fs_state);
5387 #else
5388 		printf("0x%x sblock.fs_npsect\n", sblock.fs_npsect);
5389 #endif
5390 		printf("0x%x sblock.fs_si\n", sblock.fs_si);
5391 		printf("0x%x sblock.fs_trackskew\n", sblock.fs_trackskew);
5392 		printf("0x%x sblock.fs_id[0]\n", sblock.fs_id[0]);
5393 		printf("0x%x sblock.fs_id[1]\n", sblock.fs_id[1]);
5394 		printf("0x%x sblock.fs_csaddr\n", sblock.fs_csaddr);
5395 		printf("0x%x sblock.fs_cssize\n", sblock.fs_cssize);
5396 		printf("0x%x sblock.fs_cgsize\n", sblock.fs_cgsize);
5397 		printf("0x%x sblock.fs_ntrak\n", sblock.fs_ntrak);
5398 		printf("0x%x sblock.fs_nsect\n", sblock.fs_nsect);
5399 		printf("0x%x sblock.fs_spc\n", sblock.fs_spc);
5400 		printf("0x%x sblock.fs_ncyl\n", sblock.fs_ncyl);
5401 		printf("0x%x sblock.fs_cpg\n", sblock.fs_cpg);
5402 		printf("0x%x sblock.fs_ipg\n", sblock.fs_ipg);
5403 		printf("0x%x sblock.fs_fpg\n", sblock.fs_fpg);
5404 		printf("0x%x sblock.fs_cstotal\n", sblock.fs_cstotal);
5405 		printf("0x%x sblock.fs_fmod\n", sblock.fs_fmod);
5406 		printf("0x%x sblock.fs_clean\n", sblock.fs_clean);
5407 		printf("0x%x sblock.fs_ronly\n", sblock.fs_ronly);
5408 		printf("0x%x sblock.fs_flags\n", sblock.fs_flags);
5409 		printf("0x%x sblock.fs_fsmnt\n", sblock.fs_fsmnt);
5410 		printf("0x%x sblock.fs_cgrotor\n", sblock.fs_cgrotor);
5411 		printf("0x%x sblock.fs_u.fs_csp\n", sblock.fs_u.fs_csp);
5412 		printf("0x%x sblock.fs_cpc\n", sblock.fs_cpc);
5413 
5414 		/*
5415 		 * No macros are defined for the dimensions of the
5416 		 * opostbl array.
5417 		 */
5418 		for (row = 0; row < 16; row++) {
5419 			for (column = 0; column < 8; column++) {
5420 				printf("0x%x sblock.fs_opostbl[%d][%d]\n",
5421 				    sblock.fs_opostbl[row][column],
5422 				    row, column);
5423 			}
5424 		}
5425 
5426 		/*
5427 		 * Ditto the size of sparecon.
5428 		 */
5429 		for (row = 0; row < 51; row++) {
5430 			printf("0x%x sblock.fs_sparecon[%d]\n",
5431 			    sblock.fs_sparecon[row], row);
5432 		}
5433 
5434 		printf("0x%x sblock.fs_version\n", sblock.fs_version);
5435 		printf("0x%x sblock.fs_logbno\n", sblock.fs_logbno);
5436 		printf("0x%x sblock.fs_reclaim\n", sblock.fs_reclaim);
5437 		printf("0x%x sblock.fs_sparecon2\n", sblock.fs_sparecon2);
5438 #ifdef _LITTLE_ENDIAN
5439 		printf("0x%x sblock.fs_npsect\n", sblock.fs_npsect);
5440 #else
5441 		printf("0x%x sblock.fs_state\n", sblock.fs_state);
5442 #endif
5443 		printf("0x%llx sblock.fs_qbmask\n", sblock.fs_qbmask);
5444 		printf("0x%llx sblock.fs_qfmask\n", sblock.fs_qfmask);
5445 		printf("0x%x sblock.fs_postblformat\n", sblock.fs_postblformat);
5446 		printf("0x%x sblock.fs_nrpos\n", sblock.fs_nrpos);
5447 		printf("0x%x sblock.fs_postbloff\n", sblock.fs_postbloff);
5448 		printf("0x%x sblock.fs_rotbloff\n", sblock.fs_rotbloff);
5449 		printf("0x%x sblock.fs_magic\n", sblock.fs_magic);
5450 
5451 		/*
5452 		 * fs_space isn't of much use in this context, so we'll
5453 		 * just ignore it for now.
5454 		 */
5455 	}
5456 }
5457