xref: /titanic_50/usr/src/cmd/fs.d/ufs/mkfs/mkfs.c (revision 554ff184129088135ad2643c1c9832174a17be88)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
28 /*	  All Rights Reserved  	*/
29 
30 /*
31  * University Copyright- Copyright (c) 1982, 1986, 1988
32  * The Regents of the University of California
33  * All Rights Reserved
34  *
35  * University Acknowledgment- Portions of this document are derived from
36  * software developed by the University of California, Berkeley, and its
37  * contributors.
38  */
39 
40 #pragma ident	"%Z%%M%	%I%	%E% SMI"
41 
42 
43 /*
44  * The maximum supported file system size (in sectors) is the
45  * number of frags that can be represented in an int32_t field
46  * (INT_MAX) times the maximum number of sectors per frag.  Since
47  * the maximum frag size is MAXBSIZE, the maximum number of sectors
48  * per frag is MAXBSIZE/DEV_BSIZE.
49  */
50 #define	FS_MAX	(((diskaddr_t)INT_MAX) * (MAXBSIZE/DEV_BSIZE))
51 
52 /*
53  * make file system for cylinder-group style file systems
54  *
55  * usage:
56  *
57  *    mkfs [-F FSType] [-V] [-G [-P]] [-M dirname] [-m] [options]
58  *	[-o specific_options]  special size
59  *	[nsect ntrack bsize fsize cpg	minfree	rps nbpi opt apc rotdelay
60  *	  2     3      4     5     6	7	8   9	 10  11  12
61  *	nrpos maxcontig mtb]
62  *	13    14	15
63  *
64  *  where specific_options are:
65  *	N - no create
66  *	nsect - The number of sectors per track
67  *	ntrack - The number of tracks per cylinder
68  *	bsize - block size
69  *	fragsize - fragment size
70  *	cgsize - The number of disk cylinders per cylinder group.
71  * 	free - minimum free space
72  *	rps - rotational speed (rev/sec).
73  *	nbpi - number of data bytes per allocated inode
74  *	opt - optimization (space, time)
75  *	apc - number of alternates
76  *	gap - gap size
77  *	nrpos - number of rotational positions
78  *	maxcontig - maximum number of logical blocks that will be
79  *		allocated contiguously before inserting rotational delay
80  *	mtb - if "y", set up file system for eventual growth to over a
81  *		a terabyte
82  * -P Do not grow the file system, but print on stdout the maximal
83  *    size in sectors to which the file system can be increased. The calculated
84  *    size is limited by the value provided by the operand size.
85  *
86  * Note that -P is a project-private interface and together with -G intended
87  * to be used only by the growfs script. It is therefore purposely not
88  * documented in the man page.
89  * The -P option is covered by PSARC case 2003/422.
90  */
91 
92 /*
93  * The following constants set the defaults used for the number
94  * of sectors/track (fs_nsect), and number of tracks/cyl (fs_ntrak).
95  *
96  *			NSECT		NTRAK
97  *	72MB CDC	18		9
98  *	30MB CDC	18		5
99  *	720KB Diskette	9		2
100  */
101 
102 #define	DFLNSECT	32
103 #define	DFLNTRAK	16
104 
105 /*
106  * The following two constants set the default block and fragment sizes.
107  * Both constants must be a power of 2 and meet the following constraints:
108  *	MINBSIZE <= DESBLKSIZE <= MAXBSIZE
109  *	DEV_BSIZE <= DESFRAGSIZE <= DESBLKSIZE
110  *	DESBLKSIZE / DESFRAGSIZE <= 8
111  */
112 #define	DESBLKSIZE	8192
113 #define	DESFRAGSIZE	1024
114 
115 /*
116  * The maximum number of cylinders in a group depends upon how much
117  * information can be stored on a single cylinder. The default is to
118  * use 16 cylinders per group.  This is effectively tradition - it was
119  * the largest value acceptable under SunOs 4.1
120  */
121 #define	DESCPG		16	/* desired fs_cpg */
122 
123 /*
124  * MINFREE gives the minimum acceptable percentage of file system
125  * blocks which may be free. If the freelist drops below this level
126  * only the superuser may continue to allocate blocks. This may
127  * be set to 0 if no reserve of free blocks is deemed necessary,
128  * however throughput drops by fifty percent if the file system
129  * is run at between 90% and 100% full; thus the default value of
130  * fs_minfree is 10%. With 10% free space, fragmentation is not a
131  * problem, so we choose to optimize for time.
132  */
133 #define	MINFREE		10
134 #define	DEFAULTOPT	FS_OPTTIME
135 
136 /*
137  * ROTDELAY gives the minimum number of milliseconds to initiate
138  * another disk transfer on the same cylinder. It is no longer used
139  * and will always default to 0.
140  */
141 #define	ROTDELAY	0
142 
143 /*
144  * MAXBLKPG determines the maximum number of data blocks which are
145  * placed in a single cylinder group. The default is one indirect
146  * block worth of data blocks.
147  */
148 #define	MAXBLKPG(bsize)	((bsize) / sizeof (daddr32_t))
149 
150 /*
151  * Each file system has a number of inodes statically allocated.
152  * We allocate one inode slot per NBPI bytes, expecting this
153  * to be far more than we will ever need.
154  */
155 #define	NBPI		2048	/* Number Bytes Per Inode */
156 #define	MTB_NBPI	(MB)	/* Number Bytes Per Inode for multi-terabyte */
157 
158 /*
159  * Disks are assumed to rotate at 60HZ, unless otherwise specified.
160  */
161 #define	DEFHZ		60
162 
163 /*
164  * Cylinder group related limits.
165  *
166  * For each cylinder we keep track of the availability of blocks at different
167  * rotational positions, so that we can lay out the data to be picked
168  * up with minimum rotational latency.  NRPOS is the number of rotational
169  * positions which we distinguish.  With NRPOS 8 the resolution of our
170  * summary information is 2ms for a typical 3600 rpm drive.
171  */
172 #define	NRPOS		8	/* number distinct rotational positions */
173 
174 /*
175  * range_check "user_supplied" flag values.
176  */
177 #define	RC_DEFAULT	0
178 #define	RC_KEYWORD	1
179 #define	RC_POSITIONAL	2
180 
181 #ifndef	STANDALONE
182 #include	<stdio.h>
183 #include	<sys/mnttab.h>
184 #endif
185 
186 #include	<stdlib.h>
187 #include	<unistd.h>
188 #include	<malloc.h>
189 #include	<string.h>
190 #include	<strings.h>
191 #include	<ctype.h>
192 #include	<errno.h>
193 #include	<sys/param.h>
194 #include	<time.h>
195 #include	<sys/types.h>
196 #include	<sys/sysmacros.h>
197 #include	<sys/vnode.h>
198 #include	<sys/fs/ufs_fsdir.h>
199 #include	<sys/fs/ufs_inode.h>
200 #include	<sys/fs/ufs_fs.h>
201 #include	<sys/fs/ufs_log.h>
202 #include	<sys/mntent.h>
203 #include	<sys/filio.h>
204 #include	<limits.h>
205 #include	<sys/int_const.h>
206 #include	<signal.h>
207 #include	<sys/efi_partition.h>
208 #include	"roll_log.h"
209 
210 #define	bcopy(f, t, n)    (void) memcpy(t, f, n)
211 #define	bzero(s, n)	(void) memset(s, 0, n)
212 #define	bcmp(s, d, n)	memcmp(s, d, n)
213 
214 #define	index(s, r)	strchr(s, r)
215 #define	rindex(s, r)	strrchr(s, r)
216 
217 #include	<sys/stat.h>
218 #include	<sys/statvfs.h>
219 #include	<locale.h>
220 #include	<fcntl.h>
221 #include 	<sys/isa_defs.h>	/* for ENDIAN defines */
222 #include	<sys/vtoc.h>
223 
224 #include	<sys/dkio.h>
225 #include	<sys/asynch.h>
226 
227 extern offset_t	llseek();
228 extern char	*getfullblkname();
229 extern long	lrand48();
230 
231 extern int	optind;
232 extern char	*optarg;
233 
234 
235 /*
236  * The size of a cylinder group is calculated by CGSIZE. The maximum size
237  * is limited by the fact that cylinder groups are at most one block.
238  * Its size is derived from the size of the maps maintained in the
239  * cylinder group and the (struct cg) size.
240  */
241 #define	CGSIZE(fs) \
242 	/* base cg		*/ (sizeof (struct cg) + \
243 	/* blktot size	*/ (fs)->fs_cpg * sizeof (long) + \
244 	/* blks size	*/ (fs)->fs_cpg * (fs)->fs_nrpos * sizeof (short) + \
245 	/* inode map	*/ howmany((fs)->fs_ipg, NBBY) + \
246 	/* block map */ howmany((fs)->fs_cpg * (fs)->fs_spc / NSPF(fs), NBBY))
247 
248 /*
249  * We limit the size of the inode map to be no more than a
250  * third of the cylinder group space, since we must leave at
251  * least an equal amount of space for the block map.
252  *
253  * N.B.: MAXIpG must be a multiple of INOPB(fs).
254  */
255 #define	MAXIpG(fs)	roundup((fs)->fs_bsize * NBBY / 3, INOPB(fs))
256 
257 /*
258  * Same as MAXIpG, but parameterized by the block size (b) and the
259  * cylinder group divisor (d), which is the reciprocal of the fraction of the
260  * cylinder group overhead block that is used for the inode map.  So for
261  * example, if d = 5, the macro's computation assumes that 1/5 of the
262  * cylinder group overhead block can be dedicated to the inode map.
263  */
264 #define	MAXIpG_B(b, d)	roundup((b) * NBBY / (d), (b) / sizeof (struct dinode))
265 
266 #define	UMASK		0755
267 #define	MAXINOPB	(MAXBSIZE / sizeof (struct dinode))
268 #define	POWEROF2(num)	(((num) & ((num) - 1)) == 0)
269 #define	MB		(1024*1024)
270 #define	BETWEEN(x, l, h)	((x) >= (l) && (x) <= (h))
271 
272 /*
273  * Used to set the inode generation number. Since both inodes and dinodes
274  * are dealt with, we really need a pointer to an icommon here.
275  */
276 #define	IRANDOMIZE(icp)	(icp)->ic_gen = lrand48();
277 
278 /*
279  * Flags for number()
280  */
281 #define	ALLOW_PERCENT	0x01	/* allow trailing `%' on number */
282 #define	ALLOW_MS1	0x02	/* allow trailing `ms', state 1 */
283 #define	ALLOW_MS2	0x04	/* allow trailing `ms', state 2 */
284 #define	ALLOW_END_ONLY	0x08	/* must be at end of number & suffixes */
285 
286 #define	MAXAIO	1000	/* maximum number of outstanding I/O's we'll manage */
287 #define	BLOCK	1	/* block in aiowait */
288 #define	NOBLOCK	0	/* don't block in aiowait */
289 
290 #define	RELEASE 1	/* free an aio buffer after use */
291 #define	SAVE	0	/* don't free the buffer */
292 
293 typedef struct aio_trans {
294 	aio_result_t resultbuf;
295 	diskaddr_t bno;
296 	char *buffer;
297 	int size;
298 	int release;
299 	struct aio_trans *next;
300 } aio_trans;
301 
302 typedef struct aio_results {
303 	int max;
304 	int outstanding;
305 	int maxpend;
306 	aio_trans *trans;
307 } aio_results;
308 
309 int aio_inited = 0;
310 aio_results results;
311 
312 /*
313  * Allow up to MAXBUF aio requests that each have a unique buffer.
314  * More aio's might be done, but not using memory through the getbuf()
315  * interface.  This can be raised, but you run into the potential of
316  * using more memory than is physically available on the machine,
317  * and if you start swapping, you can forget about performance.
318  * To prevent this, we also limit the total memory used for a given
319  * type of buffer to MAXBUFMEM.
320  *
321  * Tests indicate a cylinder group's worth of inodes takes:
322  *
323  *	NBPI	Size of Inode Buffer
324  *	 2k	1688k
325  *	 8k	 424k
326  *
327  * initcg() stores all the inodes for a cylinder group in one buffer,
328  * so allowing 20 buffers could take 32 MB if not limited by MAXBUFMEM.
329  */
330 #define	MAXBUF		20
331 #define	MAXBUFMEM	(8 * 1024 * 1024)
332 
333 /*
334  * header information for buffers managed by getbuf() and freebuf()
335  */
336 typedef struct bufhdr {
337 	struct bufhdr *head;
338 	struct bufhdr *next;
339 } bufhdr;
340 
341 int bufhdrsize;
342 
343 bufhdr inodebuf = { NULL, NULL };
344 bufhdr cgsumbuf = { NULL, NULL };
345 
346 #define	SECTORS_PER_TERABYTE	(1LL << 31)
347 /*
348  * The following constant specifies an upper limit for file system size
349  * that is actually a lot bigger than we expect to support with UFS. (Since
350  * it's specified in sectors, the file system size would be 2**44 * 512,
351  * which is 2**53, which is 8192 Terabytes.)  However, it's useful
352  * for checking the basic sanity of a size value that is input on the
353  * command line.
354  */
355 #define	FS_SIZE_UPPER_LIMIT	0x100000000000LL
356 
357 /*
358  * Forward declarations
359  */
360 static char *getbuf(bufhdr *bufhead, int size);
361 static void freebuf(char *buf);
362 static void freetrans(aio_trans *transp);
363 static aio_trans *get_aiop();
364 static aio_trans *wait_for_write(int block);
365 static void initcg(int cylno);
366 static void fsinit();
367 static int makedir(struct direct *protodir, int entries);
368 static void iput(struct inode *ip);
369 static void rdfs(diskaddr_t bno, int size, char *bf);
370 static void wtfs(diskaddr_t bno, int size, char *bf);
371 static void awtfs(diskaddr_t bno, int size, char *bf, int release);
372 static void wtfs_breakup(diskaddr_t bno, int size, char *bf);
373 static int isblock(struct fs *fs, unsigned char *cp, int h);
374 static void clrblock(struct fs *fs, unsigned char *cp, int h);
375 static void setblock(struct fs *fs, unsigned char *cp, int h);
376 static void usage();
377 static void dump_fscmd(char *fsys, int fsi);
378 static uint64_t number(uint64_t d_value, char *param, int flags);
379 static int match(char *s);
380 static char checkopt(char *optim);
381 static char checkmtb(char *mtbarg);
382 static void range_check(long *varp, char *name, long minimum,
383     long maximum, long def_val, int user_supplied);
384 static void range_check_64(uint64_t *varp, char *name, uint64_t minimum,
385     uint64_t maximum, uint64_t def_val, int user_supplied);
386 static daddr32_t alloc(int size, int mode);
387 static diskaddr_t get_max_size(int fd);
388 static long get_max_track_size(int fd);
389 static void block_sigint(sigset_t *old_mask);
390 static void unblock_sigint(sigset_t *old_mask);
391 static void recover_from_sigint(int signum);
392 static int confirm_abort(void);
393 static int getline(FILE *fp, char *loc, int maxlen);
394 static void flush_writes(void);
395 static long compute_maxcpg(long, long, long, long, long);
396 static int in_64bit_mode(void);
397 static int validate_size(int fd, diskaddr_t size);
398 
399 union {
400 	struct fs fs;
401 	char pad[SBSIZE];
402 } fsun;
403 #define	sblock	fsun.fs
404 
405 struct	csum *fscs;
406 
407 union cgun {
408 	struct cg cg;
409 	char pad[MAXBSIZE];
410 } cgun;
411 
412 #define	acg	cgun.cg
413 /*
414  * Size of screen in cols in which to fit output
415  */
416 #define	WIDTH	80
417 
418 struct dinode zino[MAXBSIZE / sizeof (struct dinode)];
419 
420 /*
421  * file descriptors used for rdfs(fsi) and wtfs(fso).
422  * Initialized to an illegal file descriptor number.
423  */
424 int	fsi = -1;
425 int	fso = -1;
426 
427 /*
428  * The BIG parameter is machine dependent.  It should be a longlong integer
429  * constant that can be used by the number parser to check the validity
430  * of numeric parameters.
431  */
432 
433 #define	BIG		0x7fffffffffffffffLL
434 
435 /* Used to indicate to number() that a bogus value should cause us to exit */
436 #define	NO_DEFAULT	LONG_MIN
437 
438 /*
439  * The *_flag variables are used to indicate that the user specified
440  * the values, rather than that we made them up ourselves.  We can
441  * complain about the user giving us bogus values.
442  */
443 
444 /* semi-constants */
445 long	sectorsize = DEV_BSIZE;		/* bytes/sector from param.h */
446 long	bbsize = BBSIZE;		/* boot block size */
447 long	sbsize = SBSIZE;		/* superblock size */
448 
449 /* parameters */
450 diskaddr_t	fssize_db;		/* file system size in disk blocks */
451 diskaddr_t	fssize_frag;		/* file system size in frags */
452 long	cpg;				/* cylinders/cylinder group */
453 int	cpg_flag = RC_DEFAULT;
454 long	rotdelay = -1;			/* rotational delay between blocks */
455 int	rotdelay_flag = RC_DEFAULT;
456 long	maxcontig;			/* max contiguous blocks to allocate */
457 int	maxcontig_flag = RC_DEFAULT;
458 long	nsect = DFLNSECT;		/* sectors per track */
459 int	nsect_flag = RC_DEFAULT;
460 long	ntrack = DFLNTRAK;		/* tracks per cylinder group */
461 int	ntrack_flag = RC_DEFAULT;
462 long	bsize = DESBLKSIZE;		/* filesystem block size */
463 int	bsize_flag = RC_DEFAULT;
464 long	fragsize = DESFRAGSIZE; 	/* filesystem fragment size */
465 int	fragsize_flag = RC_DEFAULT;
466 long	minfree = MINFREE; 		/* fs_minfree */
467 int	minfree_flag = RC_DEFAULT;
468 long	rps = DEFHZ;			/* revolutions/second of drive */
469 int	rps_flag = RC_DEFAULT;
470 long	nbpi = NBPI;			/* number of bytes per inode */
471 int	nbpi_flag = RC_DEFAULT;
472 long	nrpos = NRPOS;			/* number of rotational positions */
473 int	nrpos_flag = RC_DEFAULT;
474 long	apc = 0;			/* alternate sectors per cylinder */
475 int	apc_flag = RC_DEFAULT;
476 char	opt = 't';			/* optimization style, `t' or `s' */
477 char	mtb = 'n';			/* multi-terabyte format, 'y' or 'n' */
478 
479 long	debug = 0;			/* enable debugging output */
480 
481 int	spc_flag = 0;			/* alternate sectors specified or */
482 					/* found */
483 
484 /* global state */
485 int	Nflag;		/* do not write to disk */
486 int	mflag;		/* return the command line used to create this FS */
487 char	*fsys;
488 time_t	mkfstime;
489 char	*string;
490 
491 /*
492  * logging support
493  */
494 int	ismdd;			/* true if device is a SVM device */
495 int	islog;			/* true if ufs or SVM logging is enabled */
496 int	islogok;		/* true if ufs/SVM log state is good */
497 
498 static int	isufslog;	/* true if ufs logging is enabled */
499 static int	waslog;		/* true when ufs logging disabled during grow */
500 
501 /*
502  * growfs defines, globals, and forward references
503  */
504 #define	NOTENOUGHSPACE 33
505 int		grow;
506 static int	Pflag;		/* probe to which size the fs can be grown */
507 int		ismounted;
508 char		*directory;
509 diskaddr_t	grow_fssize;
510 long		grow_fs_size;
511 long		grow_fs_ncg;
512 diskaddr_t		grow_fs_csaddr;
513 long		grow_fs_cssize;
514 int		grow_fs_clean;
515 struct csum	*grow_fscs;
516 diskaddr_t		grow_sifrag;
517 int		test;
518 int		testforce;
519 diskaddr_t		testfrags;
520 int		inlockexit;
521 int		isbad;
522 
523 void		lockexit(int);
524 void		randomgeneration(void);
525 void		checksummarysize(void);
526 void		checksblock(void);
527 void		growinit(char *);
528 void		checkdev(char *, char  *);
529 void		checkmount(struct mnttab *, char *);
530 struct dinode	*gdinode(ino_t);
531 int		csfraginrange(daddr32_t);
532 struct csfrag	*findcsfrag(daddr32_t, struct csfrag **);
533 void		checkindirect(ino_t, daddr32_t *, daddr32_t, int);
534 void		addcsfrag(ino_t, daddr32_t, struct csfrag **);
535 void		delcsfrag(daddr32_t, struct csfrag **);
536 void		checkdirect(ino_t, daddr32_t *, daddr32_t *, int);
537 void		findcsfragino(void);
538 void		fixindirect(daddr32_t, int);
539 void		fixdirect(caddr_t, daddr32_t, daddr32_t *, int);
540 void		fixcsfragino(void);
541 void		extendsummaryinfo(void);
542 int		notenoughspace(void);
543 void		unalloccsfragino(void);
544 void		unalloccsfragfree(void);
545 void		findcsfragfree(void);
546 void		copycsfragino(void);
547 void		rdcg(long);
548 void		wtcg(void);
549 void		flcg(void);
550 void		allocfrags(long, daddr32_t *, long *);
551 void		alloccsfragino(void);
552 void		alloccsfragfree(void);
553 void		freefrags(daddr32_t, long, long);
554 int		findfreerange(long *, long *);
555 void		resetallocinfo(void);
556 void		extendcg(long);
557 void		ulockfs(void);
558 void		wlockfs(void);
559 void		clockfs(void);
560 void		wtsb(void);
561 static int64_t	checkfragallocated(daddr32_t);
562 static struct csum 	*read_summaryinfo(struct fs *);
563 static diskaddr_t 	probe_summaryinfo();
564 
565 void
566 main(int argc, char *argv[])
567 {
568 	long i, mincpc, mincpg, ibpcl;
569 	long cylno, rpos, blk, j, warn = 0;
570 	long mincpgcnt, maxcpg;
571 	uint64_t used, bpcg, inospercg;
572 	long mapcramped, inodecramped;
573 	long postblsize, rotblsize, totalsbsize;
574 	FILE *mnttab;
575 	struct mnttab mntp;
576 	char *special;
577 	struct statvfs64 fs;
578 	struct dk_cinfo dkcinfo;
579 	char pbuf[sizeof (uint64_t) * 3 + 1];
580 	int width, plen;
581 	uint64_t num;
582 	int c, saverr;
583 	diskaddr_t max_fssize;
584 	long tmpmaxcontig = -1;
585 	struct sigaction sigact;
586 	uint64_t nbytes64;
587 	int remaining_cg;
588 	int do_dot = 0;
589 
590 	(void) setlocale(LC_ALL, "");
591 
592 #if !defined(TEXT_DOMAIN)
593 #define	TEXT_DOMAIN "SYS_TEST"
594 #endif
595 	(void) textdomain(TEXT_DOMAIN);
596 
597 	while ((c = getopt(argc, argv, "F:bmo:VPGM:T:t:")) != EOF) {
598 		switch (c) {
599 
600 		case 'F':
601 			string = optarg;
602 			if (strcmp(string, "ufs") != 0)
603 				usage();
604 			break;
605 
606 		case 'm':	/* return command line used to create this FS */
607 			mflag++;
608 			break;
609 
610 		case 'o':
611 			/*
612 			 * ufs specific options.
613 			 */
614 			string = optarg;
615 			while (*string != '\0') {
616 				if (match("nsect=")) {
617 					nsect = number(DFLNSECT, "nsect", 0);
618 					nsect_flag = RC_KEYWORD;
619 				} else if (match("ntrack=")) {
620 					ntrack = number(DFLNTRAK, "ntrack", 0);
621 					ntrack_flag = RC_KEYWORD;
622 				} else if (match("bsize=")) {
623 					bsize = number(DESBLKSIZE, "bsize", 0);
624 					bsize_flag = RC_KEYWORD;
625 				} else if (match("fragsize=")) {
626 					fragsize = number(DESFRAGSIZE,
627 					    "fragsize", 0);
628 					fragsize_flag = RC_KEYWORD;
629 				} else if (match("cgsize=")) {
630 					cpg = number(DESCPG, "cgsize", 0);
631 					cpg_flag = RC_KEYWORD;
632 				} else if (match("free=")) {
633 					minfree = number(MINFREE, "free",
634 					    ALLOW_PERCENT);
635 					minfree_flag = RC_KEYWORD;
636 				} else if (match("maxcontig=")) {
637 					tmpmaxcontig =
638 					    number(-1, "maxcontig", 0);
639 					maxcontig_flag = RC_KEYWORD;
640 				} else if (match("nrpos=")) {
641 					nrpos = number(NRPOS, "nrpos", 0);
642 					nrpos_flag = RC_KEYWORD;
643 				} else if (match("rps=")) {
644 					rps = number(DEFHZ, "rps", 0);
645 					rps_flag = RC_KEYWORD;
646 				} else if (match("nbpi=")) {
647 					nbpi = number(NBPI, "nbpi", 0);
648 					nbpi_flag = RC_KEYWORD;
649 				} else if (match("opt=")) {
650 					opt = checkopt(string);
651 				} else if (match("mtb=")) {
652 					mtb = checkmtb(string);
653 				} else if (match("apc=")) {
654 					apc = number(0, "apc", 0);
655 					apc_flag = RC_KEYWORD;
656 				} else if (match("gap=")) {
657 					(void) number(0, "gap", ALLOW_MS1);
658 					rotdelay = ROTDELAY;
659 					rotdelay_flag = RC_DEFAULT;
660 				} else if (match("debug=")) {
661 					debug = number(0, "debug", 0);
662 				} else if (match("N")) {
663 					Nflag++;
664 				} else if (*string == '\0') {
665 					break;
666 				} else {
667 					(void) fprintf(stderr, gettext(
668 						"illegal option: %s\n"),
669 						string);
670 					usage();
671 				}
672 
673 				if (*string == ',') string++;
674 				if (*string == ' ') string++;
675 			}
676 			break;
677 
678 		case 'V':
679 			{
680 				char	*opt_text;
681 				int	opt_count;
682 
683 				(void) fprintf(stdout, gettext("mkfs -F ufs "));
684 				for (opt_count = 1; opt_count < argc;
685 								opt_count++) {
686 					opt_text = argv[opt_count];
687 					if (opt_text)
688 					    (void) fprintf(stdout, " %s ",
689 								opt_text);
690 				}
691 				(void) fprintf(stdout, "\n");
692 			}
693 			break;
694 
695 		case 'b':	/* do nothing for this */
696 			break;
697 
698 		case 'M':	/* grow the mounted file system */
699 			directory = optarg;
700 
701 			/* FALLTHROUGH */
702 		case 'G':	/* grow the file system */
703 			grow = 1;
704 			break;
705 		case 'P':	/* probe the file system growing size 	*/
706 			Pflag = 1;
707 			grow = 1; /* probe mode implies fs growing	*/
708 			break;
709 		case 'T':	/* For testing */
710 			testforce = 1;
711 
712 			/* FALLTHROUGH */
713 		case 't':
714 			test = 1;
715 			string = optarg;
716 			testfrags = number(NO_DEFAULT, "testfrags", 0);
717 			break;
718 
719 		case '?':
720 			usage();
721 			break;
722 		}
723 	}
724 #ifdef MKFS_DEBUG
725 	/*
726 	 * Turning on MKFS_DEBUG causes mkfs to produce a filesystem
727 	 * that can be reproduced by setting the time to 0 and seeding
728 	 * the random number generator to a constant.
729 	 */
730 	mkfstime = 0;	/* reproducible results */
731 #else
732 	(void) time(&mkfstime);
733 #endif
734 
735 	if (optind >= (argc - 1)) {
736 		if (optind > (argc - 1)) {
737 			(void) fprintf(stderr,
738 			    gettext("special not specified\n"));
739 			usage();
740 		} else if (mflag == 0) {
741 			(void) fprintf(stderr,
742 			    gettext("size not specified\n"));
743 			usage();
744 		}
745 	}
746 	argc -= optind;
747 	argv = &argv[optind];
748 
749 	fsys = argv[0];
750 	fsi = open64(fsys, O_RDONLY);
751 	if (fsi < 0) {
752 		(void) fprintf(stderr, gettext("%s: cannot open\n"), fsys);
753 		lockexit(32);
754 	}
755 
756 	if (mflag) {
757 		dump_fscmd(fsys, fsi);
758 		lockexit(0);
759 	}
760 
761 	/*
762 	 * The task of setting all of the configuration parameters for a
763 	 * UFS file system is basically a matter of solving n equations
764 	 * in m variables.  Typically, m is greater than n, so there is
765 	 * usually more than one valid solution.  Since this is usually
766 	 * an under-constrained problem, it's not always obvious what the
767 	 * "best" configuration is.
768 	 *
769 	 * In general, the approach is to
770 	 * 1. Determine the values for the file system parameters
771 	 *    that are externally contrained and therefore not adjustable
772 	 *    by mkfs (such as the device's size and maxtransfer size).
773 	 * 2. Acquire the user's requested setting for all configuration
774 	 *    values that can be set on the command line.
775 	 * 3. Determine the final value of all configuration values, by
776 	 *    the following approach:
777 	 *	- set the file system block size (fs_bsize).  Although
778 	 *	  this could be regarded as an adjustable parameter, in
779 	 *	  fact, it's pretty much a constant.  At this time, it's
780 	 *	  generally set to 8k (with older hardware, it can
781 	 *	  sometimes make sense to set it to 4k, but those
782 	 *	  situations are pretty rare now).
783 	 *	- re-adjust the maximum file system size based on the
784 	 *	  value of the file system block size.  Since the
785 	 *	  frag size can't be any larger than a file system
786 	 *	  block, and the number of frags in the file system
787 	 *	  has to fit into 31 bits, the file system block size
788 	 *	  affects the maximum file system size.
789 	 *	- now that the real maximum file system is known, set the
790 	 *	  actual size of the file system to be created to
791 	 *	  MIN(requested size, maximum file system size).
792 	 *	- now validate, and if necessary, adjust the following
793 	 *	  values:
794 	 *		rotdelay
795 	 *		nsect
796 	 *		maxcontig
797 	 *		apc
798 	 *		frag_size
799 	 *		rps
800 	 *		minfree
801 	 *		nrpos
802 	 *		nrack
803 	 *		nbpi
804 	 *	- calculate maxcpg (the maximum value of the cylinders-per-
805 	 *	  cylinder-group configuration parameters).  There are two
806 	 *	  algorithms for calculating maxcpg:  an old one, which is
807 	 *	  used for file systems of less than 1 terabyte, and a
808 	 *	  new one, implemented in the function compute_maxcpg(),
809 	 *	  which is used for file systems of greater than 1 TB.
810 	 *	  The difference between them is that compute_maxcpg()
811 	 *	  really tries to maximize the cpg value.  The old
812 	 *	  algorithm fails to take advantage of smaller frags and
813 	 *	  lower inode density when determining the maximum cpg,
814 	 *	  and thus comes up with much lower numbers in some
815 	 *	  configurations.  At some point, we might use the
816 	 *	  new algorithm for determining maxcpg for all file
817 	 *	  systems, but at this time, the changes implemented for
818 	 *	  multi-terabyte UFS are NOT being automatically applied
819 	 *	  to UFS file systems of less than a terabyte (in the
820 	 *	  interest of not changing existing UFS policy too much
821 	 *	  until the ramifications of the changes are well-understood
822 	 *	  and have been evaluated for their effects on performance.)
823 	 *	- check the current values of the configuration parameters
824 	 *	  against the various constraints imposed by UFS.  These
825 	 *	  include:
826 	 *		* There must be at least one inode in each
827 	 *		  cylinder group.
828 	 *		* The cylinder group overhead block, which
829 	 *		  contains the inode and frag bigmaps, must fit
830 	 *		  within one file system block.
831 	 *		* The space required for inode maps should
832 	 *		  occupy no more than a third of the cylinder
833 	 *		  group overhead block.
834 	 *		* The rotational position tables have to fit
835 	 *		  within the available space in the super block.
836 	 *	  Adjust the configuration values that can be adjusted
837 	 *	  so that these constraints are satisfied.  The
838 	 *	  configuration values that are adjustable are:
839 	 *		* frag size
840 	 *		* cylinders per group
841 	 *		* inode density (can be increased)
842 	 *		* number of rotational positions (the rotational
843 	 *		  position tables are eliminated altogether if
844 	 *		  there isn't enough room for them.)
845 	 * 4. Set the values for all the dependent configuration
846 	 *    values (those that aren't settable on the command
847 	 *    line and which are completely dependent on the
848 	 *    adjustable parameters).  This include cpc (cycles
849 	 *    per cylinder, spc (sectors-per-cylinder), and many others.
850 	 */
851 
852 	max_fssize = get_max_size(fsi);
853 
854 	/*
855 	 * Get and check positional arguments, if any.
856 	 */
857 	switch (argc - 1) {
858 	default:
859 		usage();
860 		/*NOTREACHED*/
861 	case 15:
862 		mtb = checkmtb(argv[15]);
863 		/* FALLTHROUGH */
864 	case 14:
865 		string = argv[14];
866 		tmpmaxcontig = number(-1, "maxcontig", 0);
867 		maxcontig_flag = RC_POSITIONAL;
868 		/* FALLTHROUGH */
869 	case 13:
870 		string = argv[13];
871 		nrpos = number(NRPOS, "nrpos", 0);
872 		nrpos_flag = RC_POSITIONAL;
873 		/* FALLTHROUGH */
874 	case 12:
875 		string = argv[12];
876 		rotdelay = ROTDELAY;
877 		rotdelay_flag = RC_DEFAULT;
878 		/* FALLTHROUGH */
879 	case 11:
880 		string = argv[11];
881 		apc = number(0, "apc", 0);
882 		apc_flag = RC_POSITIONAL;
883 		/* FALLTHROUGH */
884 	case 10:
885 		opt = checkopt(argv[10]);
886 		/* FALLTHROUGH */
887 	case 9:
888 		string = argv[9];
889 		nbpi = number(NBPI, "nbpi", 0);
890 		nbpi_flag = RC_POSITIONAL;
891 		/* FALLTHROUGH */
892 	case 8:
893 		string = argv[8];
894 		rps = number(DEFHZ, "rps", 0);
895 		rps_flag = RC_POSITIONAL;
896 		/* FALLTHROUGH */
897 	case 7:
898 		string = argv[7];
899 		minfree = number(MINFREE, "free", ALLOW_PERCENT);
900 		minfree_flag = RC_POSITIONAL;
901 		/* FALLTHROUGH */
902 	case 6:
903 		string = argv[6];
904 		cpg = number(DESCPG, "cgsize", 0);
905 		cpg_flag = RC_POSITIONAL;
906 		/* FALLTHROUGH */
907 	case 5:
908 		string = argv[5];
909 		fragsize = number(DESFRAGSIZE, "fragsize", 0);
910 		fragsize_flag = RC_POSITIONAL;
911 		/* FALLTHROUGH */
912 	case 4:
913 		string = argv[4];
914 		bsize = number(DESBLKSIZE, "bsize", 0);
915 		bsize_flag = RC_POSITIONAL;
916 		/* FALLTHROUGH */
917 	case 3:
918 		string = argv[3];
919 		ntrack = number(DFLNTRAK, "ntrack", 0);
920 		ntrack_flag = RC_POSITIONAL;
921 		/* FALLTHROUGH */
922 	case 2:
923 		string = argv[2];
924 		nsect = number(DFLNSECT, "nsect", 0);
925 		nsect_flag = RC_POSITIONAL;
926 		/* FALLTHROUGH */
927 	case 1:
928 		string = argv[1];
929 		fssize_db = number(max_fssize, "size", 0);
930 	}
931 
932 
933 	if ((maxcontig_flag == RC_DEFAULT) || (tmpmaxcontig == -1) ||
934 		(maxcontig == -1)) {
935 		long maxtrax = get_max_track_size(fsi);
936 		maxcontig = maxtrax / bsize;
937 
938 	} else {
939 		maxcontig = tmpmaxcontig;
940 	}
941 
942 	if (rotdelay == -1) {	/* default by newfs and mkfs */
943 		rotdelay = ROTDELAY;
944 	}
945 
946 	if (cpg_flag == RC_DEFAULT) { /* If not explicity set, use default */
947 		cpg = DESCPG;
948 	}
949 
950 	/*
951 	 * Now that we have the semi-sane args, either positional, via -o,
952 	 * or by defaulting, handle inter-dependencies and range checks.
953 	 */
954 
955 	/*
956 	 * Settle the file system block size first, since it's a fixed
957 	 * parameter once set and so many other parameters, including
958 	 * max_fssize, depend on it.
959 	 */
960 	range_check(&bsize, "bsize", MINBSIZE, MAXBSIZE, DESBLKSIZE,
961 	    bsize_flag);
962 
963 	if (!POWEROF2(bsize)) {
964 		(void) fprintf(stderr,
965 		    gettext("block size must be a power of 2, not %ld\n"),
966 		    bsize);
967 		bsize = DESBLKSIZE;
968 		(void) fprintf(stderr,
969 		    gettext("mkfs: bsize reset to default %ld\n"),
970 		    bsize);
971 	}
972 
973 	if (fssize_db > max_fssize && validate_size(fsi, fssize_db)) {
974 		(void) fprintf(stderr, gettext(
975 		    "Warning: the requested size of this file system\n"
976 		    "(%lld sectors) is greater than the size of the\n"
977 		    "device reported by the driver (%lld sectors).\n"
978 		    "However, a read of the device at the requested size\n"
979 		    "does succeed, so the requested size will be used.\n"),
980 		    fssize_db, max_fssize);
981 		max_fssize = fssize_db;
982 	}
983 	/*
984 	 * Since the maximum allocatable unit (the frag) must be less than
985 	 * or equal to bsize, and the number of frags must be less than or
986 	 * equal to INT_MAX, the total size of the file system (in
987 	 * bytes) must be less than or equal to bsize * INT_MAX.
988 	 */
989 
990 	if (max_fssize > ((diskaddr_t)bsize/DEV_BSIZE) * INT_MAX)
991 		max_fssize = ((diskaddr_t)bsize/DEV_BSIZE) * INT_MAX;
992 	range_check_64(&fssize_db, "size", 1024LL, max_fssize, max_fssize, 1);
993 
994 	if (fssize_db >= SECTORS_PER_TERABYTE) {
995 		mtb = 'y';
996 		if (!in_64bit_mode()) {
997 			(void) fprintf(stderr, gettext(
998 "mkfs:  Warning: Creating a file system greater than 1 terabyte on a\n"
999 "       system running a 32-bit kernel.  This file system will not be\n"
1000 "       accessible until the system is rebooted with a 64-bit kernel.\n"));
1001 		}
1002 	}
1003 
1004 
1005 	/*
1006 	 * 32K based on max block size of 64K, and rotational layout
1007 	 * test of nsect <= (256 * sectors/block).  Current block size
1008 	 * limit is not 64K, but it's growing soon.
1009 	 */
1010 	range_check(&nsect, "nsect", 1, 32768, DFLNSECT, nsect_flag);
1011 	range_check(&apc, "apc", 0, nsect - 1, 0, apc_flag);
1012 
1013 	if (mtb == 'y')
1014 		fragsize = bsize;
1015 
1016 	range_check(&fragsize, "fragsize", sectorsize, bsize,
1017 	    MAX(bsize / MAXFRAG, MIN(DESFRAGSIZE, bsize)), fragsize_flag);
1018 
1019 	if ((bsize / MAXFRAG) > fragsize) {
1020 		(void) fprintf(stderr, gettext(
1021 "fragment size %ld is too small, minimum with block size %ld is %ld\n"),
1022 		    fragsize, bsize, bsize / MAXFRAG);
1023 		(void) fprintf(stderr,
1024 		    gettext("mkfs: fragsize reset to minimum %ld\n"),
1025 		    bsize / MAXFRAG);
1026 		fragsize = bsize / MAXFRAG;
1027 	}
1028 
1029 	if (!POWEROF2(fragsize)) {
1030 		(void) fprintf(stderr,
1031 		    gettext("fragment size must be a power of 2, not %ld\n"),
1032 		    fragsize);
1033 		fragsize = MAX(bsize / MAXFRAG, MIN(DESFRAGSIZE, bsize));
1034 		(void) fprintf(stderr,
1035 		    gettext("mkfs: fragsize reset to %ld\n"),
1036 		    fragsize);
1037 	}
1038 
1039 	/* At this point, bsize must be >= fragsize, so no need to check it */
1040 
1041 	if (bsize < PAGESIZE) {
1042 		(void) fprintf(stderr, gettext(
1043 		    "WARNING: filesystem block size (%ld) is smaller than "
1044 		    "memory page size (%ld).\nResulting filesystem can not be "
1045 		    "mounted on this system.\n\n"),
1046 		    bsize, (long)PAGESIZE);
1047 	}
1048 
1049 	range_check(&rps, "rps", 1, 1000, DEFHZ, rps_flag);
1050 	range_check(&minfree, "free", 0, 99, MINFREE, minfree_flag);
1051 	range_check(&nrpos, "nrpos", 1, nsect, MIN(nsect, NRPOS), nrpos_flag);
1052 
1053 	/*
1054 	 * ntrack is the number of tracks per cylinder.
1055 	 * The ntrack value must be between 1 and the total number of
1056 	 * sectors in the file system.
1057 	 */
1058 	range_check(&ntrack, "ntrack", 1,
1059 	    fssize_db > INT_MAX ? INT_MAX : (uint32_t)fssize_db,
1060 	    DFLNTRAK, ntrack_flag);
1061 
1062 	/*
1063 	 * nbpi is variable, but 2MB seems a reasonable upper limit,
1064 	 * as 4MB tends to cause problems (using otherwise-default
1065 	 * parameters).  The true limit is where we end up with one
1066 	 * inode per cylinder group.  If this file system is being
1067 	 * configured for multi-terabyte access, nbpi must be at least 1MB.
1068 	 */
1069 	if (mtb == 'y' && nbpi < MTB_NBPI) {
1070 		(void) fprintf(stderr, gettext("mkfs: bad value for nbpi: "
1071 			"must be at least 1048576 for multi-terabyte, "
1072 			"nbpi reset to default 1048576\n"));
1073 		nbpi = MTB_NBPI;
1074 	}
1075 
1076 	if (mtb == 'y')
1077 		range_check(&nbpi, "nbpi", MTB_NBPI, 2 * MB, MTB_NBPI,
1078 			nbpi_flag);
1079 	else
1080 		range_check(&nbpi, "nbpi", DEV_BSIZE, 2 * MB, NBPI, nbpi_flag);
1081 
1082 	/*
1083 	 * maxcpg is another variably-limited parameter.  Calculate
1084 	 * the limit based on what we've got for its dependent
1085 	 * variables.  Effectively, it's how much space is left in the
1086 	 * superblock after all the other bits are accounted for.  We
1087 	 * only fill in sblock fields so we can use MAXIpG.
1088 	 *
1089 	 * If the calculation of maxcpg below (for the mtb == 'n'
1090 	 * case) is changed, update newfs as well.
1091 	 *
1092 	 * For old-style, non-MTB format file systems, use the old
1093 	 * algorithm for calculating the maximum cylinder group size,
1094 	 * even though it limits the cylinder group more than necessary.
1095 	 * Since layout can affect performance, we don't want to change
1096 	 * the default layout for non-MTB file systems at this time.
1097 	 * However, for MTB file systems, use the new maxcpg calculation,
1098 	 * which really maxes out the cylinder group size.
1099 	 */
1100 
1101 	sblock.fs_bsize = bsize;
1102 	sblock.fs_inopb = sblock.fs_bsize / sizeof (struct dinode);
1103 
1104 	if (mtb == 'n') {
1105 		maxcpg = (bsize - sizeof (struct cg) -
1106 		    howmany(MAXIpG(&sblock), NBBY)) /
1107 		    (sizeof (long) + nrpos * sizeof (short) +
1108 		    nsect / (MAXFRAG * NBBY));
1109 	} else {
1110 		maxcpg = compute_maxcpg(bsize, fragsize, nbpi, nrpos,
1111 		    nsect * ntrack);
1112 	}
1113 
1114 	if (cpg == -1)
1115 		cpg = maxcpg;
1116 	/*
1117 	 * mincpg is variable in complex ways, so we really can't
1118 	 * do a sane lower-end limit check at this point.
1119 	 */
1120 	range_check(&cpg, "cgsize", 1, maxcpg, MIN(maxcpg, DESCPG), cpg_flag);
1121 
1122 	/*
1123 	 * get the controller info
1124 	 */
1125 	ismdd = 0;
1126 	islog = 0;
1127 	islogok = 0;
1128 	waslog = 0;
1129 
1130 	if (ioctl(fsi, DKIOCINFO, &dkcinfo) == 0)
1131 		/*
1132 		 * if it is an MDD (disksuite) device
1133 		 */
1134 		if (dkcinfo.dki_ctype == DKC_MD) {
1135 			ismdd++;
1136 			/*
1137 			 * check the logging device
1138 			 */
1139 			if (ioctl(fsi, _FIOISLOG, NULL) == 0) {
1140 				islog++;
1141 				if (ioctl(fsi, _FIOISLOGOK, NULL) == 0)
1142 					islogok++;
1143 			}
1144 		}
1145 
1146 	/*
1147 	 * Do not grow the file system, but print on stdout the maximum
1148 	 * size in sectors to which the file system can be increased.
1149 	 * The calculated size is limited by fssize_db.
1150 	 * Note that we don't lock the filesystem and therefore under rare
1151 	 * conditions (the filesystem is mounted, the free block count is
1152 	 * almost zero, and the superuser is still changing it) the calculated
1153 	 * size can be imprecise.
1154 	 */
1155 	if (Pflag) {
1156 		(void) printf("%llu\n", probe_summaryinfo());
1157 		exit(0);
1158 	}
1159 
1160 	/*
1161 	 * If we're growing an existing filesystem, then we're about
1162 	 * to start doing things that can require recovery efforts if
1163 	 * we get interrupted, so make sure we get a chance to do so.
1164 	 */
1165 	if (grow) {
1166 		sigact.sa_handler = recover_from_sigint;
1167 		sigemptyset(&sigact.sa_mask);
1168 		sigact.sa_flags = SA_RESTART;
1169 
1170 		if (sigaction(SIGINT, &sigact, (struct sigaction *)NULL) < 0) {
1171 			perror(gettext("Could not register SIGINT handler"));
1172 			lockexit(3);
1173 		}
1174 	}
1175 
1176 	if (!Nflag) {
1177 		/*
1178 		 * Check if MNTTAB is trustable
1179 		 */
1180 		if (statvfs64(MNTTAB, &fs) < 0) {
1181 			(void) fprintf(stderr, gettext("can't statvfs %s\n"),
1182 				MNTTAB);
1183 			exit(32);
1184 		}
1185 
1186 		if (strcmp(MNTTYPE_MNTFS, fs.f_basetype) != 0) {
1187 			(void) fprintf(stderr, gettext(
1188 				"%s file system type is not %s, can't mkfs\n"),
1189 				MNTTAB, MNTTYPE_MNTFS);
1190 			exit(32);
1191 		}
1192 
1193 		special = getfullblkname(fsys);
1194 		checkdev(fsys, special);
1195 
1196 		/*
1197 		 * If we found the block device name,
1198 		 * then check the mount table.
1199 		 * if mounted, and growing write lock the file system
1200 		 *
1201 		 */
1202 		if ((special != NULL) && (*special != '\0')) {
1203 			if ((mnttab = fopen(MNTTAB, "r")) == NULL) {
1204 				(void) fprintf(stderr, gettext(
1205 					"can't open %s\n"), MNTTAB);
1206 				exit(32);
1207 			}
1208 			while ((getmntent(mnttab, &mntp)) == NULL) {
1209 				if (grow) {
1210 					checkmount(&mntp, special);
1211 					continue;
1212 				}
1213 				if (strcmp(special, mntp.mnt_special) == 0) {
1214 					(void) fprintf(stderr, gettext(
1215 					    "%s is mounted, can't mkfs\n"),
1216 					    special);
1217 					exit(32);
1218 				}
1219 			}
1220 			(void) fclose(mnttab);
1221 		}
1222 
1223 		if (directory && (ismounted == 0)) {
1224 			(void) fprintf(stderr, gettext("%s is not mounted\n"),
1225 			    special);
1226 			lockexit(32);
1227 		}
1228 
1229 		fso = (grow) ? open64(fsys, O_WRONLY) : creat64(fsys, 0666);
1230 		if (fso < 0) {
1231 			saverr = errno;
1232 			(void) fprintf(stderr,
1233 			    gettext("%s: cannot create: %s\n"),
1234 			    fsys, strerror(saverr));
1235 			lockexit(32);
1236 		}
1237 
1238 	} else {
1239 
1240 		/*
1241 		 * For the -N case, a file descriptor is needed for the llseek()
1242 		 * in wtfs(). See the comment in wtfs() for more information.
1243 		 *
1244 		 * Get a file descriptor that's read-only so that this code
1245 		 * doesn't accidentally write to the file.
1246 		 */
1247 		fso = open64(fsys, O_RDONLY);
1248 		if (fso < 0) {
1249 			saverr = errno;
1250 			(void) fprintf(stderr, gettext("%s: cannot open: %s\n"),
1251 			    fsys, strerror(saverr));
1252 			lockexit(32);
1253 		}
1254 	}
1255 
1256 	/*
1257 	 * seed random # generator (for ic_generation)
1258 	 */
1259 #ifdef MKFS_DEBUG
1260 	srand48(12962);	/* reproducible results */
1261 #else
1262 	srand48((long)(time((time_t *)NULL) + getpid()));
1263 #endif
1264 
1265 	if (grow) {
1266 		growinit(fsys);
1267 		goto grow00;
1268 	}
1269 
1270 	/*
1271 	 * Validate the given file system size.
1272 	 * Verify that its last block can actually be accessed.
1273 	 *
1274 	 * Note: it's ok to use sblock as a buffer because it is immediately
1275 	 * overwritten by the rdfs() of the superblock in the next line.
1276 	 *
1277 	 * ToDo: Because the size checking is done in rdfs()/wtfs(), the
1278 	 * error message for specifying an illegal size is very unfriendly.
1279 	 * In the future, one could replace the rdfs()/wtfs() calls
1280 	 * below with in-line calls to read() or write(). This allows better
1281 	 * error messages to be put in place.
1282 	 */
1283 	rdfs(fssize_db - 1, (int)sectorsize, (char *)&sblock);
1284 
1285 	/*
1286 	 * make the fs unmountable
1287 	 */
1288 	rdfs((diskaddr_t)(SBOFF / sectorsize), (int)sbsize, (char *)&sblock);
1289 	sblock.fs_magic = -1;
1290 	sblock.fs_clean = FSBAD;
1291 	sblock.fs_state = FSOKAY - sblock.fs_time;
1292 	wtfs((diskaddr_t)(SBOFF / sectorsize), (int)sbsize, (char *)&sblock);
1293 	bzero(&sblock, (size_t)sbsize);
1294 
1295 	sblock.fs_nsect = nsect;
1296 	sblock.fs_ntrak = ntrack;
1297 
1298 	/*
1299 	 * Validate specified/determined spc
1300 	 * and calculate minimum cylinders per group.
1301 	 */
1302 
1303 	/*
1304 	 * sectors/cyl = tracks/cyl * sectors/track
1305 	 */
1306 	sblock.fs_spc = sblock.fs_ntrak * sblock.fs_nsect;
1307 
1308 grow00:
1309 	if (apc_flag) {
1310 		sblock.fs_spc -= apc;
1311 	}
1312 	/*
1313 	 * Have to test for this separately from apc_flag, due to
1314 	 * the growfs case....
1315 	 */
1316 	if (sblock.fs_spc != sblock.fs_ntrak * sblock.fs_nsect) {
1317 		spc_flag = 1;
1318 	}
1319 	if (grow)
1320 		goto grow10;
1321 
1322 	sblock.fs_nrpos = nrpos;
1323 	sblock.fs_bsize = bsize;
1324 	sblock.fs_fsize = fragsize;
1325 	sblock.fs_minfree = minfree;
1326 
1327 grow10:
1328 	if (nbpi < sblock.fs_fsize) {
1329 		(void) fprintf(stderr, gettext(
1330 		"warning: wasteful data byte allocation / inode (nbpi):\n"));
1331 		(void) fprintf(stderr, gettext(
1332 		    "%ld smaller than allocatable fragment size of %d\n"),
1333 		    nbpi, sblock.fs_fsize);
1334 	}
1335 	if (grow)
1336 		goto grow20;
1337 
1338 	if (opt == 's')
1339 		sblock.fs_optim = FS_OPTSPACE;
1340 	else
1341 		sblock.fs_optim = FS_OPTTIME;
1342 
1343 	sblock.fs_bmask = ~(sblock.fs_bsize - 1);
1344 	sblock.fs_fmask = ~(sblock.fs_fsize - 1);
1345 	/*
1346 	 * Planning now for future expansion.
1347 	 */
1348 #if defined(_BIG_ENDIAN)
1349 		sblock.fs_qbmask.val[0] = 0;
1350 		sblock.fs_qbmask.val[1] = ~sblock.fs_bmask;
1351 		sblock.fs_qfmask.val[0] = 0;
1352 		sblock.fs_qfmask.val[1] = ~sblock.fs_fmask;
1353 #endif
1354 #if defined(_LITTLE_ENDIAN)
1355 		sblock.fs_qbmask.val[0] = ~sblock.fs_bmask;
1356 		sblock.fs_qbmask.val[1] = 0;
1357 		sblock.fs_qfmask.val[0] = ~sblock.fs_fmask;
1358 		sblock.fs_qfmask.val[1] = 0;
1359 #endif
1360 	for (sblock.fs_bshift = 0, i = sblock.fs_bsize; i > 1; i >>= 1)
1361 		sblock.fs_bshift++;
1362 	for (sblock.fs_fshift = 0, i = sblock.fs_fsize; i > 1; i >>= 1)
1363 		sblock.fs_fshift++;
1364 	sblock.fs_frag = numfrags(&sblock, sblock.fs_bsize);
1365 	for (sblock.fs_fragshift = 0, i = sblock.fs_frag; i > 1; i >>= 1)
1366 		sblock.fs_fragshift++;
1367 	if (sblock.fs_frag > MAXFRAG) {
1368 		(void) fprintf(stderr, gettext(
1369 	"fragment size %d is too small, minimum with block size %d is %d\n"),
1370 		    sblock.fs_fsize, sblock.fs_bsize,
1371 		    sblock.fs_bsize / MAXFRAG);
1372 		lockexit(32);
1373 	}
1374 	sblock.fs_nindir = sblock.fs_bsize / sizeof (daddr32_t);
1375 	sblock.fs_inopb = sblock.fs_bsize / sizeof (struct dinode);
1376 	sblock.fs_nspf = sblock.fs_fsize / sectorsize;
1377 	for (sblock.fs_fsbtodb = 0, i = NSPF(&sblock); i > 1; i >>= 1)
1378 		sblock.fs_fsbtodb++;
1379 
1380 	/*
1381 	 * Compute the super-block, cylinder group, and inode blocks.
1382 	 * Note that these "blkno" are really fragment addresses.
1383 	 * For example, on an 8K/1K (block/fragment) system, fs_sblkno is 16,
1384 	 * fs_cblkno is 24, and fs_iblkno is 32. This is why CGSIZE is so
1385 	 * important: only 1 FS block is allocated for the cg struct (fragment
1386 	 * numbers 24 through 31).
1387 	 */
1388 	sblock.fs_sblkno =
1389 	    roundup(howmany(bbsize + sbsize, sblock.fs_fsize), sblock.fs_frag);
1390 	sblock.fs_cblkno = (daddr32_t)(sblock.fs_sblkno +
1391 	    roundup(howmany(sbsize, sblock.fs_fsize), sblock.fs_frag));
1392 	sblock.fs_iblkno = sblock.fs_cblkno + sblock.fs_frag;
1393 
1394 	sblock.fs_cgoffset = roundup(
1395 	    howmany(sblock.fs_nsect, NSPF(&sblock)), sblock.fs_frag);
1396 	for (sblock.fs_cgmask = -1, i = sblock.fs_ntrak; i > 1; i >>= 1)
1397 		sblock.fs_cgmask <<= 1;
1398 	if (!POWEROF2(sblock.fs_ntrak))
1399 		sblock.fs_cgmask <<= 1;
1400 	/*
1401 	 * Validate specified/determined spc
1402 	 * and calculate minimum cylinders per group.
1403 	 */
1404 
1405 	for (sblock.fs_cpc = NSPB(&sblock), i = sblock.fs_spc;
1406 	    sblock.fs_cpc > 1 && (i & 1) == 0;
1407 	    sblock.fs_cpc >>= 1, i >>= 1)
1408 		/* void */;
1409 	mincpc = sblock.fs_cpc;
1410 
1411 	/* if these calculations are changed, check dump_fscmd also */
1412 	bpcg = (uint64_t)sblock.fs_spc * sectorsize;
1413 	inospercg = (uint64_t)roundup(bpcg / sizeof (struct dinode),
1414 	    INOPB(&sblock));
1415 	if (inospercg > MAXIpG(&sblock))
1416 		inospercg = MAXIpG(&sblock);
1417 	used = (uint64_t)(sblock.fs_iblkno + inospercg /
1418 	    INOPF(&sblock)) * NSPF(&sblock);
1419 	mincpgcnt = (long)howmany((uint64_t)sblock.fs_cgoffset *
1420 	    (~sblock.fs_cgmask) + used, sblock.fs_spc);
1421 	mincpg = roundup(mincpgcnt, mincpc);
1422 	/*
1423 	 * Insure that cylinder group with mincpg has enough space
1424 	 * for block maps
1425 	 */
1426 	sblock.fs_cpg = mincpg;
1427 	sblock.fs_ipg = (int32_t)inospercg;
1428 	mapcramped = 0;
1429 
1430 	/*
1431 	 * Make sure the cg struct fits within the file system block.
1432 	 * Use larger block sizes until it fits
1433 	 */
1434 	while (CGSIZE(&sblock) > sblock.fs_bsize) {
1435 		mapcramped = 1;
1436 		if (sblock.fs_bsize < MAXBSIZE) {
1437 			sblock.fs_bsize <<= 1;
1438 			if ((i & 1) == 0) {
1439 				i >>= 1;
1440 			} else {
1441 				sblock.fs_cpc <<= 1;
1442 				mincpc <<= 1;
1443 				mincpg = roundup(mincpgcnt, mincpc);
1444 				sblock.fs_cpg = mincpg;
1445 			}
1446 			sblock.fs_frag <<= 1;
1447 			sblock.fs_fragshift += 1;
1448 			if (sblock.fs_frag <= MAXFRAG)
1449 				continue;
1450 		}
1451 
1452 		/*
1453 		 * Looped far enough. The fragment is now as large as the
1454 		 * filesystem block!
1455 		 */
1456 		if (sblock.fs_fsize == sblock.fs_bsize) {
1457 			(void) fprintf(stderr, gettext(
1458 		    "There is no block size that can support this disk\n"));
1459 			lockexit(32);
1460 		}
1461 
1462 		/*
1463 		 * Try a larger fragment. Double the fragment size.
1464 		 */
1465 		sblock.fs_frag >>= 1;
1466 		sblock.fs_fragshift -= 1;
1467 		sblock.fs_fsize <<= 1;
1468 		sblock.fs_nspf <<= 1;
1469 	}
1470 	/*
1471 	 * Insure that cylinder group with mincpg has enough space for inodes
1472 	 */
1473 	inodecramped = 0;
1474 	used *= sectorsize;
1475 	nbytes64 = (uint64_t)mincpg * bpcg - used;
1476 	inospercg = (uint64_t)roundup((nbytes64 / nbpi), INOPB(&sblock));
1477 	sblock.fs_ipg = (int32_t)inospercg;
1478 	while (inospercg > MAXIpG(&sblock)) {
1479 		inodecramped = 1;
1480 		if (mincpc == 1 || sblock.fs_frag == 1 ||
1481 		    sblock.fs_bsize == MINBSIZE)
1482 			break;
1483 		nbytes64 = (uint64_t)mincpg * bpcg - used;
1484 		(void) fprintf(stderr,
1485 		    gettext("With a block size of %d %s %lu\n"),
1486 		    sblock.fs_bsize, gettext("minimum bytes per inode is"),
1487 		    (uint32_t)(nbytes64 / MAXIpG(&sblock) + 1));
1488 		sblock.fs_bsize >>= 1;
1489 		sblock.fs_frag >>= 1;
1490 		sblock.fs_fragshift -= 1;
1491 		mincpc >>= 1;
1492 		sblock.fs_cpg = roundup(mincpgcnt, mincpc);
1493 		if (CGSIZE(&sblock) > sblock.fs_bsize) {
1494 			sblock.fs_bsize <<= 1;
1495 			break;
1496 		}
1497 		mincpg = sblock.fs_cpg;
1498 		nbytes64 = (uint64_t)mincpg * bpcg - used;
1499 		inospercg = (uint64_t)roundup((nbytes64 / nbpi),
1500 			INOPB(&sblock));
1501 		sblock.fs_ipg = (int32_t)inospercg;
1502 	}
1503 	if (inodecramped) {
1504 		if (inospercg > MAXIpG(&sblock)) {
1505 			nbytes64 = (uint64_t)mincpg * bpcg - used;
1506 			(void) fprintf(stderr, gettext(
1507 			    "Minimum bytes per inode is %d\n"),
1508 			    (uint32_t)(nbytes64 / MAXIpG(&sblock) + 1));
1509 		} else if (!mapcramped) {
1510 			(void) fprintf(stderr, gettext(
1511 	    "With %ld bytes per inode, minimum cylinders per group is %ld\n"),
1512 			    nbpi, mincpg);
1513 		}
1514 	}
1515 	if (mapcramped) {
1516 		(void) fprintf(stderr, gettext(
1517 		    "With %d sectors per cylinder, minimum cylinders "
1518 		    "per group is %ld\n"),
1519 		    sblock.fs_spc, mincpg);
1520 	}
1521 	if (inodecramped || mapcramped) {
1522 		/*
1523 		 * To make this at least somewhat comprehensible in
1524 		 * the world of i18n, figure out what we're going to
1525 		 * say and then say it all at one time.  The days of
1526 		 * needing to scrimp on string space are behind us....
1527 		 */
1528 		if ((sblock.fs_bsize != bsize) &&
1529 		    (sblock.fs_fsize != fragsize)) {
1530 			(void) fprintf(stderr, gettext(
1531 	    "This requires the block size to be changed from %ld to %d\n"
1532 	    "and the fragment size to be changed from %ld to %d\n"),
1533 			    bsize, sblock.fs_bsize,
1534 			    fragsize, sblock.fs_fsize);
1535 		} else if (sblock.fs_bsize != bsize) {
1536 			(void) fprintf(stderr, gettext(
1537 	    "This requires the block size to be changed from %ld to %d\n"),
1538 			    bsize, sblock.fs_bsize);
1539 		} else if (sblock.fs_fsize != fragsize) {
1540 			(void) fprintf(stderr, gettext(
1541 	    "This requires the fragment size to be changed from %ld to %d\n"),
1542 			    fragsize, sblock.fs_fsize);
1543 		} else {
1544 			(void) fprintf(stderr, gettext(
1545 	    "Unable to make filesystem fit with the given constraints\n"));
1546 		}
1547 		(void) fprintf(stderr, gettext(
1548 		    "Please re-run mkfs with corrected parameters\n"));
1549 		lockexit(32);
1550 	}
1551 	/*
1552 	 * Calculate the number of cylinders per group
1553 	 */
1554 	sblock.fs_cpg = cpg;
1555 	if (sblock.fs_cpg % mincpc != 0) {
1556 		(void) fprintf(stderr, gettext(
1557 		    "Warning: cylinder groups must have a multiple "
1558 		    "of %ld cylinders with the given\n         parameters\n"),
1559 		    mincpc);
1560 		sblock.fs_cpg = roundup(sblock.fs_cpg, mincpc);
1561 		(void) fprintf(stderr, gettext("Rounded cgsize up to %d\n"),
1562 		    sblock.fs_cpg);
1563 	}
1564 	/*
1565 	 * Must insure there is enough space for inodes
1566 	 */
1567 	/* if these calculations are changed, check dump_fscmd also */
1568 	nbytes64 = (uint64_t)sblock.fs_cpg * bpcg - used;
1569 	sblock.fs_ipg = roundup((uint32_t)(nbytes64 / nbpi), INOPB(&sblock));
1570 
1571 	/*
1572 	 * Slim down cylinders per group, until the inodes can fit.
1573 	 */
1574 	while (sblock.fs_ipg > MAXIpG(&sblock)) {
1575 		inodecramped = 1;
1576 		sblock.fs_cpg -= mincpc;
1577 		nbytes64 = (uint64_t)sblock.fs_cpg * bpcg - used;
1578 		sblock.fs_ipg = roundup((uint32_t)(nbytes64 / nbpi),
1579 			INOPB(&sblock));
1580 	}
1581 	/*
1582 	 * Must insure there is enough space to hold block map.
1583 	 * Cut down on cylinders per group, until the cg struct fits in a
1584 	 * filesystem block.
1585 	 */
1586 	while (CGSIZE(&sblock) > sblock.fs_bsize) {
1587 		mapcramped = 1;
1588 		sblock.fs_cpg -= mincpc;
1589 		nbytes64 = (uint64_t)sblock.fs_cpg * bpcg - used;
1590 		sblock.fs_ipg = roundup((uint32_t)(nbytes64 / nbpi),
1591 			INOPB(&sblock));
1592 	}
1593 	sblock.fs_fpg = (sblock.fs_cpg * sblock.fs_spc) / NSPF(&sblock);
1594 	if ((sblock.fs_cpg * sblock.fs_spc) % NSPB(&sblock) != 0) {
1595 		(void) fprintf(stderr,
1596 		gettext("newfs: panic (fs_cpg * fs_spc) %% NSPF != 0\n"));
1597 		lockexit(32);
1598 	}
1599 	if (sblock.fs_cpg < mincpg) {
1600 		(void) fprintf(stderr, gettext(
1601 "With the given parameters, cgsize must be at least %ld; please re-run mkfs\n"),
1602 			mincpg);
1603 		lockexit(32);
1604 	}
1605 	sblock.fs_cgsize = fragroundup(&sblock, CGSIZE(&sblock));
1606 grow20:
1607 	/*
1608 	 * Now have size for file system and nsect and ntrak.
1609 	 * Determine number of cylinders and blocks in the file system.
1610 	 */
1611 	fssize_frag = (int64_t)dbtofsb(&sblock, fssize_db);
1612 	if (fssize_frag > INT_MAX) {
1613 		(void) fprintf(stderr, gettext(
1614 "There are too many fragments in the system, increase fragment size\n"),
1615 		    mincpg);
1616 		lockexit(32);
1617 	}
1618 	sblock.fs_size = (int32_t)fssize_frag;
1619 	sblock.fs_ncyl = (int32_t)(fssize_frag * NSPF(&sblock) / sblock.fs_spc);
1620 	if (fssize_frag * NSPF(&sblock) >
1621 	    (uint64_t)sblock.fs_ncyl * sblock.fs_spc) {
1622 		sblock.fs_ncyl++;
1623 		warn = 1;
1624 	}
1625 	if (sblock.fs_ncyl < 1) {
1626 		(void) fprintf(stderr, gettext(
1627 			"file systems must have at least one cylinder\n"));
1628 		lockexit(32);
1629 	}
1630 	if (grow)
1631 		goto grow30;
1632 	/*
1633 	 * Determine feasability/values of rotational layout tables.
1634 	 *
1635 	 * The size of the rotational layout tables is limited by the size
1636 	 * of the file system block, fs_bsize.  The amount of space
1637 	 * available for tables is calculated as (fs_bsize - sizeof (struct
1638 	 * fs)).  The size of these tables is inversely proportional to the
1639 	 * block size of the file system. The size increases if sectors per
1640 	 * track are not powers of two, because more cylinders must be
1641 	 * described by the tables before the rotational pattern repeats
1642 	 * (fs_cpc).
1643 	 */
1644 	sblock.fs_postblformat = FS_DYNAMICPOSTBLFMT;
1645 	sblock.fs_sbsize = fragroundup(&sblock, sizeof (struct fs));
1646 	sblock.fs_npsect = sblock.fs_nsect;
1647 	if (sblock.fs_ntrak == 1) {
1648 		sblock.fs_cpc = 0;
1649 		goto next;
1650 	}
1651 	postblsize = sblock.fs_nrpos * sblock.fs_cpc * sizeof (short);
1652 	rotblsize = sblock.fs_cpc * sblock.fs_spc / NSPB(&sblock);
1653 	totalsbsize = sizeof (struct fs) + rotblsize;
1654 
1655 	/* do static allocation if nrpos == 8 and fs_cpc == 16  */
1656 	if (sblock.fs_nrpos == 8 && sblock.fs_cpc <= 16) {
1657 		/* use old static table space */
1658 		sblock.fs_postbloff = (char *)(&sblock.fs_opostbl[0][0]) -
1659 		    (char *)(&sblock.fs_link);
1660 		sblock.fs_rotbloff = &sblock.fs_space[0] -
1661 		    (uchar_t *)(&sblock.fs_link);
1662 	} else {
1663 		/* use 4.3 dynamic table space */
1664 		sblock.fs_postbloff = &sblock.fs_space[0] -
1665 		    (uchar_t *)(&sblock.fs_link);
1666 		sblock.fs_rotbloff = sblock.fs_postbloff + postblsize;
1667 		totalsbsize += postblsize;
1668 	}
1669 	if (totalsbsize > sblock.fs_bsize ||
1670 	    sblock.fs_nsect > (1 << NBBY) * NSPB(&sblock)) {
1671 		(void) fprintf(stderr, gettext(
1672 		    "Warning: insufficient space in super block for\n"
1673 		    "rotational layout tables with nsect %d, ntrack %d, "
1674 		    "and nrpos %d.\nOmitting tables - file system "
1675 		    "performance may be impaired.\n"),
1676 		    sblock.fs_nsect, sblock.fs_ntrak, sblock.fs_nrpos);
1677 
1678 		/*
1679 		 * Setting fs_cpc to 0 tells alloccgblk() in ufs_alloc.c to
1680 		 * ignore the positional layout table and rotational
1681 		 * position table.
1682 		 */
1683 		sblock.fs_cpc = 0;
1684 		goto next;
1685 	}
1686 	sblock.fs_sbsize = fragroundup(&sblock, totalsbsize);
1687 
1688 
1689 	/*
1690 	 * calculate the available blocks for each rotational position
1691 	 */
1692 	for (cylno = 0; cylno < sblock.fs_cpc; cylno++)
1693 		for (rpos = 0; rpos < sblock.fs_nrpos; rpos++)
1694 			fs_postbl(&sblock, cylno)[rpos] = -1;
1695 	for (i = (rotblsize - 1) * sblock.fs_frag;
1696 	    i >= 0; i -= sblock.fs_frag) {
1697 		cylno = cbtocylno(&sblock, i);
1698 		rpos = cbtorpos(&sblock, i);
1699 		blk = fragstoblks(&sblock, i);
1700 		if (fs_postbl(&sblock, cylno)[rpos] == -1)
1701 			fs_rotbl(&sblock)[blk] = 0;
1702 		else
1703 			fs_rotbl(&sblock)[blk] =
1704 			    fs_postbl(&sblock, cylno)[rpos] - blk;
1705 		fs_postbl(&sblock, cylno)[rpos] = blk;
1706 	}
1707 next:
1708 grow30:
1709 	/*
1710 	 * Compute/validate number of cylinder groups.
1711 	 * Note that if an excessively large filesystem is specified
1712 	 * (e.g., more than 16384 cylinders for an 8K filesystem block), it
1713 	 * does not get detected until checksummarysize()
1714 	 */
1715 	sblock.fs_ncg = sblock.fs_ncyl / sblock.fs_cpg;
1716 	if (sblock.fs_ncyl % sblock.fs_cpg)
1717 		sblock.fs_ncg++;
1718 	sblock.fs_dblkno = sblock.fs_iblkno + sblock.fs_ipg / INOPF(&sblock);
1719 	i = MIN(~sblock.fs_cgmask, sblock.fs_ncg - 1);
1720 	ibpcl = cgdmin(&sblock, i) - cgbase(&sblock, i);
1721 	if (ibpcl >= sblock.fs_fpg) {
1722 		(void) fprintf(stderr, gettext(
1723 		    "inode blocks/cyl group (%d) >= data blocks (%d)\n"),
1724 		    cgdmin(&sblock, i) - cgbase(&sblock, i) / sblock.fs_frag,
1725 		    sblock.fs_fpg / sblock.fs_frag);
1726 		if ((ibpcl < 0) || (sblock.fs_fpg < 0)) {
1727 			(void) fprintf(stderr, gettext(
1728 	    "number of cylinders per cylinder group (%d) must be decreased.\n"),
1729 			    sblock.fs_cpg);
1730 		} else {
1731 			(void) fprintf(stderr, gettext(
1732 	    "number of cylinders per cylinder group (%d) must be increased.\n"),
1733 			    sblock.fs_cpg);
1734 		}
1735 		(void) fprintf(stderr, gettext(
1736 "Note that cgsize may have been adjusted to allow struct cg to fit.\n"));
1737 		lockexit(32);
1738 	}
1739 	j = sblock.fs_ncg - 1;
1740 	if ((i = fssize_frag - j * sblock.fs_fpg) < sblock.fs_fpg &&
1741 	    cgdmin(&sblock, j) - cgbase(&sblock, j) > i) {
1742 		(void) fprintf(stderr, gettext(
1743 		    "Warning: inode blocks/cyl group (%d) >= data "
1744 		    "blocks (%ld) in last\n    cylinder group. This "
1745 		    "implies %ld sector(s) cannot be allocated.\n"),
1746 		    (cgdmin(&sblock, j) - cgbase(&sblock, j)) / sblock.fs_frag,
1747 		    i / sblock.fs_frag, i * NSPF(&sblock));
1748 		sblock.fs_ncg--;
1749 		sblock.fs_ncyl -= sblock.fs_ncyl % sblock.fs_cpg;
1750 		sblock.fs_size = fssize_frag =
1751 		    (int64_t)sblock.fs_ncyl * (int64_t)sblock.fs_spc /
1752 		    (int64_t)NSPF(&sblock);
1753 		warn = 0;
1754 	}
1755 	if (warn && !spc_flag) {
1756 		(void) fprintf(stderr, gettext(
1757 		    "Warning: %d sector(s) in last cylinder unallocated\n"),
1758 		    sblock.fs_spc - (uint32_t)(fssize_frag * NSPF(&sblock) -
1759 		    (uint64_t)(sblock.fs_ncyl - 1) * sblock.fs_spc));
1760 	}
1761 	/*
1762 	 * fill in remaining fields of the super block
1763 	 */
1764 
1765 	/*
1766 	 * The csum records are stored in cylinder group 0, starting at
1767 	 * cgdmin, the first data block.
1768 	 */
1769 	sblock.fs_csaddr = cgdmin(&sblock, 0);
1770 	sblock.fs_cssize =
1771 	    fragroundup(&sblock, sblock.fs_ncg * sizeof (struct csum));
1772 	i = sblock.fs_bsize / sizeof (struct csum);
1773 	sblock.fs_csmask = ~(i - 1);
1774 	for (sblock.fs_csshift = 0; i > 1; i >>= 1)
1775 		sblock.fs_csshift++;
1776 	fscs = (struct csum *)calloc(1, sblock.fs_cssize);
1777 
1778 	checksummarysize();
1779 	if (mtb == 'y') {
1780 		sblock.fs_magic = MTB_UFS_MAGIC;
1781 		sblock.fs_version = MTB_UFS_VERSION_1;
1782 	} else {
1783 		sblock.fs_magic = FS_MAGIC;
1784 	}
1785 
1786 	if (grow) {
1787 		bcopy((caddr_t)grow_fscs, (caddr_t)fscs, (int)grow_fs_cssize);
1788 		extendsummaryinfo();
1789 		goto grow40;
1790 	}
1791 	sblock.fs_rotdelay = rotdelay;
1792 	sblock.fs_maxcontig = maxcontig;
1793 	sblock.fs_maxbpg = MAXBLKPG(sblock.fs_bsize);
1794 
1795 	sblock.fs_rps = rps;
1796 	sblock.fs_cgrotor = 0;
1797 	sblock.fs_cstotal.cs_ndir = 0;
1798 	sblock.fs_cstotal.cs_nbfree = 0;
1799 	sblock.fs_cstotal.cs_nifree = 0;
1800 	sblock.fs_cstotal.cs_nffree = 0;
1801 	sblock.fs_fmod = 0;
1802 	sblock.fs_ronly = 0;
1803 	sblock.fs_time = mkfstime;
1804 	sblock.fs_state = FSOKAY - sblock.fs_time;
1805 	sblock.fs_clean = FSCLEAN;
1806 grow40:
1807 
1808 	/*
1809 	 * Dump out summary information about file system.
1810 	 */
1811 	(void) fprintf(stderr, gettext(
1812 	    "%s:\t%lld sectors in %d cylinders of %d tracks, %d sectors\n"),
1813 	    fsys, (uint64_t)sblock.fs_size * NSPF(&sblock), sblock.fs_ncyl,
1814 	    sblock.fs_ntrak, sblock.fs_nsect);
1815 	(void) fprintf(stderr, gettext(
1816 	    "\t%.1fMB in %d cyl groups (%d c/g, %.2fMB/g, %d i/g)\n"),
1817 	    (float)sblock.fs_size * sblock.fs_fsize / MB, sblock.fs_ncg,
1818 	    sblock.fs_cpg, (float)sblock.fs_fpg * sblock.fs_fsize / MB,
1819 	    sblock.fs_ipg);
1820 	/*
1821 	 * Now build the cylinders group blocks and
1822 	 * then print out indices of cylinder groups.
1823 	 */
1824 	(void) fprintf(stderr, gettext(
1825 	    "super-block backups (for fsck -F ufs -o b=#) at:\n"));
1826 	for (width = cylno = 0; cylno < sblock.fs_ncg && cylno < 10; cylno++) {
1827 		if ((grow == 0) || (cylno >= grow_fs_ncg))
1828 			initcg(cylno);
1829 		num = fsbtodb(&sblock, (uint64_t)cgsblock(&sblock, cylno));
1830 		(void) sprintf(pbuf, " %llu,", num);
1831 		plen = strlen(pbuf);
1832 		if ((width + plen) > (WIDTH - 1)) {
1833 			width = plen;
1834 			(void) fprintf(stderr, "\n");
1835 		} else {
1836 			width += plen;
1837 		}
1838 		(void) fprintf(stderr, "%s", pbuf);
1839 	}
1840 	(void) fprintf(stderr, "\n");
1841 
1842 	remaining_cg = sblock.fs_ncg - cylno;
1843 
1844 	/*
1845 	 * If there are more than 300 cylinder groups still to be
1846 	 * initialized, print a "." for every 50 cylinder groups.
1847 	 */
1848 	if (remaining_cg > 300) {
1849 		(void) fprintf(stderr, gettext(
1850 		    "Initializing cylinder groups:\n"));
1851 		do_dot = 1;
1852 	}
1853 
1854 	/*
1855 	 * Now initialize all cylinder groups between the first ten
1856 	 * and the last ten.
1857 	 *
1858 	 * If the number of cylinder groups was less than 10, all of the
1859 	 * cylinder group offsets would have printed in the last loop
1860 	 * and cylno will already be equal to sblock.fs_ncg and so this
1861 	 * loop will not be entered.  If there are less than 20 cylinder
1862 	 * groups, cylno is already less than fs_ncg - 10, so this loop
1863 	 * won't be entered in that case either.
1864 	 */
1865 
1866 	i = 0;
1867 	for (; cylno < sblock.fs_ncg - 10; cylno++) {
1868 		if ((grow == 0) || (cylno >= grow_fs_ncg))
1869 			initcg(cylno);
1870 		if (do_dot && cylno % 50 == 0) {
1871 			(void) fprintf(stderr, ".");
1872 			i++;
1873 			if (i == WIDTH - 1) {
1874 				(void) fprintf(stderr, "\n");
1875 				i = 0;
1876 			}
1877 		}
1878 	}
1879 
1880 	/*
1881 	 * Now print the cylinder group offsets for the last 10
1882 	 * cylinder groups, if any are left.
1883 	 */
1884 
1885 	if (do_dot) {
1886 		(void) fprintf(stderr, gettext(
1887 	    "\nsuper-block backups for last 10 cylinder groups at:\n"));
1888 	}
1889 	for (width = 0; cylno < sblock.fs_ncg; cylno++) {
1890 		if ((grow == 0) || (cylno >= grow_fs_ncg))
1891 			initcg(cylno);
1892 		num = fsbtodb(&sblock, (uint64_t)cgsblock(&sblock, cylno));
1893 		(void) sprintf(pbuf, " %llu,", num);
1894 		plen = strlen(pbuf);
1895 		if ((width + plen) > (WIDTH - 1)) {
1896 			width = plen;
1897 			(void) fprintf(stderr, "\n");
1898 		} else {
1899 			width += plen;
1900 		}
1901 		(void) fprintf(stderr, "%s", pbuf);
1902 	}
1903 	(void) fprintf(stderr, "\n");
1904 	if (Nflag)
1905 		lockexit(0);
1906 	if (grow)
1907 		goto grow50;
1908 
1909 	/*
1910 	 * Now construct the initial file system,
1911 	 * then write out the super-block.
1912 	 */
1913 	fsinit();
1914 grow50:
1915 	/*
1916 	 * write the superblock and csum information
1917 	 */
1918 	wtsb();
1919 
1920 	/*
1921 	 * extend the last cylinder group in the original file system
1922 	 */
1923 	if (grow) {
1924 		extendcg(grow_fs_ncg-1);
1925 		wtsb();
1926 	}
1927 
1928 	/*
1929 	 * Write out the duplicate super blocks to the first 10
1930 	 * cylinder groups (or fewer, if there are fewer than 10
1931 	 * cylinder groups).
1932 	 */
1933 	for (cylno = 0; cylno < sblock.fs_ncg && cylno < 10; cylno++)
1934 		awtfs(fsbtodb(&sblock, (uint64_t)cgsblock(&sblock, cylno)),
1935 		    (int)sbsize, (char *)&sblock, SAVE);
1936 
1937 	/*
1938 	 * Now write out duplicate super blocks to the remaining
1939 	 * cylinder groups.  In the case of multi-terabyte file
1940 	 * systems, just write out the super block to the last ten
1941 	 * cylinder groups (or however many are left).
1942 	 */
1943 	if (mtb == 'y') {
1944 		if (sblock.fs_ncg <= 10)
1945 			cylno = sblock.fs_ncg;
1946 		else if (sblock.fs_ncg <= 20)
1947 			cylno = 10;
1948 		else
1949 			cylno = sblock.fs_ncg - 10;
1950 	}
1951 
1952 	for (; cylno < sblock.fs_ncg; cylno++)
1953 		awtfs(fsbtodb(&sblock, (uint64_t)cgsblock(&sblock, cylno)),
1954 		    (int)sbsize, (char *)&sblock, SAVE);
1955 
1956 	/*
1957 	 * Flush out all the AIO writes we've done.  It's not
1958 	 * necessary to do this explicitly, but it's the only
1959 	 * way to report any errors from those writes.
1960 	 */
1961 	flush_writes();
1962 
1963 	/*
1964 	 * set clean flag
1965 	 */
1966 	if (grow)
1967 		sblock.fs_clean = grow_fs_clean;
1968 	else
1969 		sblock.fs_clean = FSCLEAN;
1970 	sblock.fs_time = mkfstime;
1971 	sblock.fs_state = FSOKAY - sblock.fs_time;
1972 	wtfs((diskaddr_t)(SBOFF / sectorsize), sbsize, (char *)&sblock);
1973 	isbad = 0;
1974 
1975 	if (ismdd && islog && !islogok)
1976 		(void) ioctl(fso, _FIOLOGRESET, NULL);
1977 
1978 	if (fsync(fso) == -1) {
1979 		saverr = errno;
1980 		(void) fprintf(stderr,
1981 		    gettext("mkfs: fsync failed on write disk: %s\n"),
1982 		    strerror(saverr));
1983 		/* we're just cleaning up, so keep going */
1984 	}
1985 	if (close(fsi) == -1) {
1986 		saverr = errno;
1987 		(void) fprintf(stderr,
1988 		    gettext("mkfs: close failed on read disk: %s\n"),
1989 		    strerror(saverr));
1990 		/* we're just cleaning up, so keep going */
1991 	}
1992 	if (close(fso) == -1) {
1993 		saverr = errno;
1994 		(void) fprintf(stderr,
1995 		    gettext("mkfs: close failed on write disk: %s\n"),
1996 		    strerror(saverr));
1997 		/* we're just cleaning up, so keep going */
1998 	}
1999 	fsi = fso = -1;
2000 
2001 #ifndef STANDALONE
2002 	lockexit(0);
2003 #endif
2004 }
2005 
2006 /*
2007  * Figure out how big the partition we're dealing with is.
2008  * The value returned is in disk blocks (sectors);
2009  */
2010 static diskaddr_t
2011 get_max_size(int fd)
2012 {
2013 	struct vtoc vtoc;
2014 	dk_gpt_t *efi_vtoc;
2015 	int	is_efi = 0;
2016 	diskaddr_t	slicesize;
2017 
2018 	int index = read_vtoc(fd, &vtoc);
2019 
2020 	if (index < 0) {
2021 		if (index == VT_ENOTSUP || index == VT_ERROR) {
2022 			/* it might be an EFI label */
2023 			is_efi = 1;
2024 			index = efi_alloc_and_read(fd, &efi_vtoc);
2025 		}
2026 	}
2027 
2028 	if (index < 0) {
2029 		switch (index) {
2030 		case VT_ERROR:
2031 			break;
2032 		case VT_EIO:
2033 			errno = EIO;
2034 			break;
2035 		case VT_EINVAL:
2036 			errno = EINVAL;
2037 		}
2038 		perror(gettext("Can not determine partition size"));
2039 		lockexit(32);
2040 	}
2041 
2042 	if (is_efi) {
2043 		slicesize = efi_vtoc->efi_parts[index].p_size;
2044 		efi_free(efi_vtoc);
2045 	} else {
2046 		/*
2047 		 * In the vtoc struct, p_size is a 32-bit signed quantity.
2048 		 * In the dk_gpt struct (efi's version of the vtoc), p_size
2049 		 * is an unsigned 64-bit quantity.  By casting the vtoc's
2050 		 * psize to an unsigned 32-bit quantity, it will be copied
2051 		 * to 'slicesize' (an unsigned 64-bit diskaddr_t) without
2052 		 * sign extension.
2053 		 */
2054 
2055 		slicesize = (uint32_t)vtoc.v_part[index].p_size;
2056 	}
2057 
2058 	if (debug) {
2059 		(void) fprintf(stderr,
2060 		    "get_max_size: index = %d, p_size = %lld, dolimit = %d\n",
2061 		    index, slicesize, (slicesize > FS_MAX));
2062 	}
2063 
2064 	/*
2065 	 * The next line limits a UFS file system to the maximum
2066 	 * supported size.
2067 	 */
2068 
2069 	if (slicesize > FS_MAX)
2070 		return (FS_MAX);
2071 	return (slicesize);
2072 }
2073 
2074 static long
2075 get_max_track_size(int fd)
2076 {
2077 	struct dk_cinfo ci;
2078 	long track_size = -1;
2079 
2080 	if (ioctl(fd, DKIOCINFO, &ci) == 0) {
2081 		track_size = ci.dki_maxtransfer * DEV_BSIZE;
2082 	}
2083 
2084 	if ((track_size < 0)) {
2085 		int	error = 0;
2086 		int	maxphys;
2087 		int	gotit = 0;
2088 
2089 		gotit = fsgetmaxphys(&maxphys, &error);
2090 		if (gotit) {
2091 			track_size = MIN(MB, maxphys);
2092 		} else {
2093 			(void) fprintf(stderr, gettext(
2094 "Warning: Could not get system value for maxphys. The value for\n"
2095 "maxcontig will default to 1MB.\n"));
2096 			track_size = MB;
2097 		}
2098 	}
2099 	return (track_size);
2100 }
2101 
2102 /*
2103  * Initialize a cylinder group.
2104  */
2105 static void
2106 initcg(int cylno)
2107 {
2108 	diskaddr_t cbase, d;
2109 	diskaddr_t dlower;	/* last data block before cg metadata */
2110 	diskaddr_t dupper;	/* first data block after cg metadata */
2111 	diskaddr_t dmax;
2112 	int64_t i;
2113 	struct csum *cs;
2114 	struct dinode *inode_buffer;
2115 	int size;
2116 
2117 	/*
2118 	 * Variables used to store intermediate results as a part of
2119 	 * the internal implementation of the cbtocylno() macros.
2120 	 */
2121 	diskaddr_t bno;		/* UFS block number (not sector number) */
2122 	int	cbcylno;	/* current cylinder number */
2123 	int	cbcylno_sect;	/* sector offset within cylinder */
2124 	int	cbsect_incr;	/* amount to increment sector offset */
2125 
2126 	/*
2127 	 * Variables used to store intermediate results as a part of
2128 	 * the internal implementation of the cbtorpos() macros.
2129 	 */
2130 	short	*cgblks;	/* pointer to array of free blocks in cg */
2131 	int	trackrpos;	/* tmp variable for rotation position */
2132 	int	trackoff;	/* offset within a track */
2133 	int	trackoff_incr;	/* amount to increment trackoff */
2134 	int	rpos;		/* rotation position of current block */
2135 	int	rpos_incr;	/* amount to increment rpos per block */
2136 
2137 	union cgun *icgun;	/* local pointer to a cg summary block */
2138 #define	icg	(icgun->cg)
2139 
2140 	icgun = (union cgun *)getbuf(&cgsumbuf, sizeof (union cgun));
2141 
2142 	/*
2143 	 * Determine block bounds for cylinder group.
2144 	 * Allow space for super block summary information in first
2145 	 * cylinder group.
2146 	 */
2147 	cbase = cgbase(&sblock, cylno);
2148 	dmax = cbase + sblock.fs_fpg;
2149 	if (dmax > sblock.fs_size)	/* last cg may be smaller than normal */
2150 		dmax = sblock.fs_size;
2151 	dlower = cgsblock(&sblock, cylno) - cbase;
2152 	dupper = cgdmin(&sblock, cylno) - cbase;
2153 	if (cylno == 0)
2154 		dupper += howmany(sblock.fs_cssize, sblock.fs_fsize);
2155 	cs = fscs + cylno;
2156 	icg.cg_time = mkfstime;
2157 	icg.cg_magic = CG_MAGIC;
2158 	icg.cg_cgx = cylno;
2159 	if (cylno == sblock.fs_ncg - 1)
2160 		icg.cg_ncyl = sblock.fs_ncyl % sblock.fs_cpg;
2161 	else
2162 		icg.cg_ncyl = sblock.fs_cpg;
2163 	icg.cg_niblk = sblock.fs_ipg;
2164 	icg.cg_ndblk = dmax - cbase;
2165 	icg.cg_cs.cs_ndir = 0;
2166 	icg.cg_cs.cs_nffree = 0;
2167 	icg.cg_cs.cs_nbfree = 0;
2168 	icg.cg_cs.cs_nifree = 0;
2169 	icg.cg_rotor = 0;
2170 	icg.cg_frotor = 0;
2171 	icg.cg_irotor = 0;
2172 	icg.cg_btotoff = &icg.cg_space[0] - (uchar_t *)(&icg.cg_link);
2173 	icg.cg_boff = icg.cg_btotoff + sblock.fs_cpg * sizeof (long);
2174 	icg.cg_iusedoff = icg.cg_boff +
2175 		sblock.fs_cpg * sblock.fs_nrpos * sizeof (short);
2176 	icg.cg_freeoff = icg.cg_iusedoff + howmany(sblock.fs_ipg, NBBY);
2177 	icg.cg_nextfreeoff = icg.cg_freeoff +
2178 		howmany(sblock.fs_cpg * sblock.fs_spc / NSPF(&sblock), NBBY);
2179 	for (i = 0; i < sblock.fs_frag; i++) {
2180 		icg.cg_frsum[i] = 0;
2181 	}
2182 	bzero((caddr_t)cg_inosused(&icg), icg.cg_freeoff - icg.cg_iusedoff);
2183 	icg.cg_cs.cs_nifree += sblock.fs_ipg;
2184 	if (cylno == 0)
2185 		for (i = 0; i < UFSROOTINO; i++) {
2186 			setbit(cg_inosused(&icg), i);
2187 			icg.cg_cs.cs_nifree--;
2188 		}
2189 
2190 	/*
2191 	 * Initialize all the inodes in the cylinder group using
2192 	 * random numbers.
2193 	 */
2194 	size = sblock.fs_ipg * sizeof (struct dinode);
2195 	inode_buffer = (struct dinode *)getbuf(&inodebuf, size);
2196 
2197 	for (i = 0; i < sblock.fs_ipg; i++) {
2198 		IRANDOMIZE(&(inode_buffer[i].di_ic));
2199 	}
2200 
2201 	/*
2202 	 * Write all inodes in a single write for performance.
2203 	 */
2204 	awtfs(fsbtodb(&sblock, (uint64_t)cgimin(&sblock, cylno)), (int)size,
2205 	    (char *)inode_buffer, RELEASE);
2206 
2207 	bzero((caddr_t)cg_blktot(&icg), icg.cg_boff - icg.cg_btotoff);
2208 	bzero((caddr_t)cg_blks(&sblock, &icg, 0),
2209 	    icg.cg_iusedoff - icg.cg_boff);
2210 	bzero((caddr_t)cg_blksfree(&icg), icg.cg_nextfreeoff - icg.cg_freeoff);
2211 
2212 	if (cylno > 0) {
2213 		for (d = 0; d < dlower; d += sblock.fs_frag) {
2214 			setblock(&sblock, cg_blksfree(&icg), d/sblock.fs_frag);
2215 			icg.cg_cs.cs_nbfree++;
2216 			cg_blktot(&icg)[cbtocylno(&sblock, d)]++;
2217 			cg_blks(&sblock, &icg, cbtocylno(&sblock, d))
2218 			    [cbtorpos(&sblock, d)]++;
2219 		}
2220 		sblock.fs_dsize += dlower;
2221 	}
2222 	sblock.fs_dsize += icg.cg_ndblk - dupper;
2223 	if ((i = dupper % sblock.fs_frag) != 0) {
2224 		icg.cg_frsum[sblock.fs_frag - i]++;
2225 		for (d = dupper + sblock.fs_frag - i; dupper < d; dupper++) {
2226 			setbit(cg_blksfree(&icg), dupper);
2227 			icg.cg_cs.cs_nffree++;
2228 		}
2229 	}
2230 
2231 	/*
2232 	 * WARNING: The following code is somewhat confusing, but
2233 	 * results in a substantial performance improvement in mkfs.
2234 	 *
2235 	 * Instead of using cbtocylno() and cbtorpos() macros, we
2236 	 * keep track of all the intermediate state of those macros
2237 	 * in some variables.  This allows simple addition to be
2238 	 * done to calculate the results as we step through the
2239 	 * blocks in an orderly fashion instead of the slower
2240 	 * multiplication and division the macros are forced to
2241 	 * used so they can support random input.  (Multiplication,
2242 	 * division, and remainder operations typically take about
2243 	 * 10x as many processor cycles as other operations.)
2244 	 *
2245 	 * The basic idea is to take code:
2246 	 *
2247 	 *	for (x = starting_x; x < max; x++)
2248 	 *		y = (x * c) / z
2249 	 *
2250 	 * and rewrite it to take advantage of the fact that
2251 	 * the variable x is incrementing in an orderly way:
2252 	 *
2253 	 *	intermediate = starting_x * c
2254 	 *	yval = intermediate / z
2255 	 *	for (x = starting_x; x < max; x++) {
2256 	 *		y = yval;
2257 	 *		intermediate += c
2258 	 *		if (intermediate > z) {
2259 	 *			yval++;
2260 	 *			intermediate -= z
2261 	 *		}
2262 	 *	}
2263 	 *
2264 	 * Performance has improved as much as 4X using this code.
2265 	 */
2266 
2267 	/*
2268 	 * Initialize the starting points for all the cbtocylno()
2269 	 * macro variables and figure out the increments needed each
2270 	 * time through the loop.
2271 	 */
2272 	cbcylno_sect = dupper * NSPF(&sblock);
2273 	cbsect_incr = sblock.fs_frag * NSPF(&sblock);
2274 	cbcylno = cbcylno_sect / sblock.fs_spc;
2275 	cbcylno_sect %= sblock.fs_spc;
2276 	cgblks = cg_blks(&sblock, &icg, cbcylno);
2277 	bno = dupper / sblock.fs_frag;
2278 
2279 	/*
2280 	 * Initialize the starting points for all the cbtorpos()
2281 	 * macro variables and figure out the increments needed each
2282 	 * time through the loop.
2283 	 *
2284 	 * It's harder to simplify the cbtorpos() macro if there were
2285 	 * alternate sectors specified (or if they previously existed
2286 	 * in the growfs case).  Since this is rare, we just revert to
2287 	 * using the macros in this case and skip the variable setup.
2288 	 */
2289 	if (!spc_flag) {
2290 		trackrpos = (cbcylno_sect % sblock.fs_nsect) * sblock.fs_nrpos;
2291 		rpos = trackrpos / sblock.fs_nsect;
2292 		trackoff = trackrpos % sblock.fs_nsect;
2293 		trackoff_incr = cbsect_incr * sblock.fs_nrpos;
2294 		rpos_incr = (trackoff_incr / sblock.fs_nsect) % sblock.fs_nrpos;
2295 		trackoff_incr = trackoff_incr % sblock.fs_nsect;
2296 	}
2297 
2298 	/*
2299 	 * Loop through all the blocks, marking them free and
2300 	 * updating totals kept in the superblock and cg summary.
2301 	 */
2302 	for (d = dupper; d + sblock.fs_frag <= dmax - cbase; ) {
2303 		setblock(&sblock, cg_blksfree(&icg),  bno);
2304 		icg.cg_cs.cs_nbfree++;
2305 
2306 		cg_blktot(&icg)[cbcylno]++;
2307 
2308 		if (!spc_flag)
2309 			cgblks[rpos]++;
2310 		else
2311 			cg_blks(&sblock, &icg, cbtocylno(&sblock, d))
2312 			    [cbtorpos(&sblock, d)]++;
2313 
2314 		d += sblock.fs_frag;
2315 		bno++;
2316 
2317 		/*
2318 		 * Increment the sector offset within the cylinder
2319 		 * for the cbtocylno() macro reimplementation.  If
2320 		 * we're beyond the end of the cylinder, update the
2321 		 * cylinder number, calculate the offset in the
2322 		 * new cylinder, and update the cgblks pointer
2323 		 * to the next rotational position.
2324 		 */
2325 		cbcylno_sect += cbsect_incr;
2326 		if (cbcylno_sect >= sblock.fs_spc) {
2327 			cbcylno++;
2328 			cbcylno_sect -= sblock.fs_spc;
2329 			cgblks += sblock.fs_nrpos;
2330 		}
2331 
2332 		/*
2333 		 * If there aren't alternate sectors, increment the
2334 		 * rotational position variables for the cbtorpos()
2335 		 * reimplementation.  Note that we potentially
2336 		 * increment rpos twice.  Once by rpos_incr, and one
2337 		 * more time when we wrap to a new track because
2338 		 * trackoff >= fs_nsect.
2339 		 */
2340 		if (!spc_flag) {
2341 			trackoff += trackoff_incr;
2342 			rpos += rpos_incr;
2343 			if (trackoff >= sblock.fs_nsect) {
2344 				trackoff -= sblock.fs_nsect;
2345 				rpos++;
2346 			}
2347 			if (rpos >= sblock.fs_nrpos)
2348 				rpos -= sblock.fs_nrpos;
2349 		}
2350 	}
2351 
2352 	if (d < dmax - cbase) {
2353 		icg.cg_frsum[dmax - cbase - d]++;
2354 		for (; d < dmax - cbase; d++) {
2355 			setbit(cg_blksfree(&icg), d);
2356 			icg.cg_cs.cs_nffree++;
2357 		}
2358 	}
2359 	sblock.fs_cstotal.cs_ndir += icg.cg_cs.cs_ndir;
2360 	sblock.fs_cstotal.cs_nffree += icg.cg_cs.cs_nffree;
2361 	sblock.fs_cstotal.cs_nbfree += icg.cg_cs.cs_nbfree;
2362 	sblock.fs_cstotal.cs_nifree += icg.cg_cs.cs_nifree;
2363 	*cs = icg.cg_cs;
2364 	awtfs(fsbtodb(&sblock, (uint64_t)cgtod(&sblock, cylno)),
2365 		sblock.fs_bsize, (char *)&icg, RELEASE);
2366 }
2367 
2368 /*
2369  * initialize the file system
2370  */
2371 struct inode node;
2372 
2373 #define	LOSTDIR
2374 #ifdef LOSTDIR
2375 #define	PREDEFDIR 3
2376 #else
2377 #define	PREDEFDIR 2
2378 #endif
2379 
2380 struct direct root_dir[] = {
2381 	{ UFSROOTINO, sizeof (struct direct), 1, "." },
2382 	{ UFSROOTINO, sizeof (struct direct), 2, ".." },
2383 #ifdef LOSTDIR
2384 	{ LOSTFOUNDINO, sizeof (struct direct), 10, "lost+found" },
2385 #endif
2386 };
2387 #ifdef LOSTDIR
2388 struct direct lost_found_dir[] = {
2389 	{ LOSTFOUNDINO, sizeof (struct direct), 1, "." },
2390 	{ UFSROOTINO, sizeof (struct direct), 2, ".." },
2391 	{ 0, DIRBLKSIZ, 0, 0 },
2392 };
2393 #endif
2394 char buf[MAXBSIZE];
2395 
2396 static void
2397 fsinit()
2398 {
2399 	int i;
2400 
2401 
2402 	/*
2403 	 * initialize the node
2404 	 */
2405 	node.i_atime = mkfstime;
2406 	node.i_mtime = mkfstime;
2407 	node.i_ctime = mkfstime;
2408 #ifdef LOSTDIR
2409 	/*
2410 	 * create the lost+found directory
2411 	 */
2412 	(void) makedir(lost_found_dir, 2);
2413 	for (i = DIRBLKSIZ; i < sblock.fs_bsize; i += DIRBLKSIZ) {
2414 		bcopy(&lost_found_dir[2], &buf[i], DIRSIZ(&lost_found_dir[2]));
2415 	}
2416 	node.i_number = LOSTFOUNDINO;
2417 	node.i_smode = node.i_mode = IFDIR | 0700;
2418 	node.i_nlink = 2;
2419 	node.i_size = sblock.fs_bsize;
2420 	node.i_db[0] = alloc((int)node.i_size, node.i_mode);
2421 	node.i_blocks = btodb(fragroundup(&sblock, (int)node.i_size));
2422 	IRANDOMIZE(&node.i_ic);
2423 	wtfs(fsbtodb(&sblock, (uint64_t)node.i_db[0]), (int)node.i_size, buf);
2424 	iput(&node);
2425 #endif
2426 	/*
2427 	 * create the root directory
2428 	 */
2429 	node.i_number = UFSROOTINO;
2430 	node.i_mode = node.i_smode = IFDIR | UMASK;
2431 	node.i_nlink = PREDEFDIR;
2432 	node.i_size = makedir(root_dir, PREDEFDIR);
2433 	node.i_db[0] = alloc(sblock.fs_fsize, node.i_mode);
2434 	/* i_size < 2GB because we are initializing the file system */
2435 	node.i_blocks = btodb(fragroundup(&sblock, (int)node.i_size));
2436 	IRANDOMIZE(&node.i_ic);
2437 	wtfs(fsbtodb(&sblock, (uint64_t)node.i_db[0]), sblock.fs_fsize, buf);
2438 	iput(&node);
2439 }
2440 
2441 /*
2442  * construct a set of directory entries in "buf".
2443  * return size of directory.
2444  */
2445 static int
2446 makedir(struct direct *protodir, int entries)
2447 {
2448 	char *cp;
2449 	int i;
2450 	ushort_t spcleft;
2451 
2452 	spcleft = DIRBLKSIZ;
2453 	for (cp = buf, i = 0; i < entries - 1; i++) {
2454 		protodir[i].d_reclen = DIRSIZ(&protodir[i]);
2455 		bcopy(&protodir[i], cp, protodir[i].d_reclen);
2456 		cp += protodir[i].d_reclen;
2457 		spcleft -= protodir[i].d_reclen;
2458 	}
2459 	protodir[i].d_reclen = spcleft;
2460 	bcopy(&protodir[i], cp, DIRSIZ(&protodir[i]));
2461 	return (DIRBLKSIZ);
2462 }
2463 
2464 /*
2465  * allocate a block or frag
2466  */
2467 static daddr32_t
2468 alloc(int size, int mode)
2469 {
2470 	int i, frag;
2471 	daddr32_t d;
2472 
2473 	rdfs(fsbtodb(&sblock, (uint64_t)cgtod(&sblock, 0)), sblock.fs_cgsize,
2474 	    (char *)&acg);
2475 	if (acg.cg_magic != CG_MAGIC) {
2476 		(void) fprintf(stderr, gettext("cg 0: bad magic number\n"));
2477 		lockexit(32);
2478 	}
2479 	if (acg.cg_cs.cs_nbfree == 0) {
2480 		(void) fprintf(stderr,
2481 			gettext("first cylinder group ran out of space\n"));
2482 		lockexit(32);
2483 	}
2484 	for (d = 0; d < acg.cg_ndblk; d += sblock.fs_frag)
2485 		if (isblock(&sblock, cg_blksfree(&acg), d / sblock.fs_frag))
2486 			goto goth;
2487 	(void) fprintf(stderr,
2488 	    gettext("internal error: can't find block in cyl 0\n"));
2489 	lockexit(32);
2490 goth:
2491 	clrblock(&sblock, cg_blksfree(&acg), d / sblock.fs_frag);
2492 	acg.cg_cs.cs_nbfree--;
2493 	sblock.fs_cstotal.cs_nbfree--;
2494 	fscs[0].cs_nbfree--;
2495 	if (mode & IFDIR) {
2496 		acg.cg_cs.cs_ndir++;
2497 		sblock.fs_cstotal.cs_ndir++;
2498 		fscs[0].cs_ndir++;
2499 	}
2500 	cg_blktot(&acg)[cbtocylno(&sblock, d)]--;
2501 	cg_blks(&sblock, &acg, cbtocylno(&sblock, d))[cbtorpos(&sblock, d)]--;
2502 	if (size != sblock.fs_bsize) {
2503 		frag = howmany(size, sblock.fs_fsize);
2504 		fscs[0].cs_nffree += sblock.fs_frag - frag;
2505 		sblock.fs_cstotal.cs_nffree += sblock.fs_frag - frag;
2506 		acg.cg_cs.cs_nffree += sblock.fs_frag - frag;
2507 		acg.cg_frsum[sblock.fs_frag - frag]++;
2508 		for (i = frag; i < sblock.fs_frag; i++)
2509 			setbit(cg_blksfree(&acg), d + i);
2510 	}
2511 	wtfs(fsbtodb(&sblock, (uint64_t)cgtod(&sblock, 0)), sblock.fs_cgsize,
2512 	    (char *)&acg);
2513 	return (d);
2514 }
2515 
2516 /*
2517  * Allocate an inode on the disk
2518  */
2519 static void
2520 iput(struct inode *ip)
2521 {
2522 	struct dinode buf[MAXINOPB];
2523 	diskaddr_t d;
2524 
2525 	rdfs(fsbtodb(&sblock, (uint64_t)cgtod(&sblock, 0)), sblock.fs_cgsize,
2526 	    (char *)&acg);
2527 	if (acg.cg_magic != CG_MAGIC) {
2528 		(void) fprintf(stderr, gettext("cg 0: bad magic number\n"));
2529 		lockexit(32);
2530 	}
2531 	acg.cg_cs.cs_nifree--;
2532 	setbit(cg_inosused(&acg), ip->i_number);
2533 	wtfs(fsbtodb(&sblock, (uint64_t)cgtod(&sblock, 0)), sblock.fs_cgsize,
2534 	    (char *)&acg);
2535 	sblock.fs_cstotal.cs_nifree--;
2536 	fscs[0].cs_nifree--;
2537 	if ((int)ip->i_number >= sblock.fs_ipg * sblock.fs_ncg) {
2538 		(void) fprintf(stderr,
2539 			gettext("fsinit: inode value out of range (%d).\n"),
2540 			ip->i_number);
2541 		lockexit(32);
2542 	}
2543 	d = fsbtodb(&sblock, (uint64_t)itod(&sblock, (int)ip->i_number));
2544 	rdfs(d, sblock.fs_bsize, (char *)buf);
2545 	buf[itoo(&sblock, (int)ip->i_number)].di_ic = ip->i_ic;
2546 	wtfs(d, sblock.fs_bsize, (char *)buf);
2547 }
2548 
2549 /*
2550  * getbuf()	-- Get a buffer for use in an AIO operation.  Buffer
2551  *		is zero'd the first time returned, left with whatever
2552  *		was in memory after that.  This function actually gets
2553  *		enough memory the first time it's called to support
2554  *		MAXBUF buffers like a slab allocator.  When all the
2555  *		buffers are in use, it waits for an aio to complete
2556  *		and make a buffer available.
2557  *
2558  *		Never returns an error.  Either succeeds or exits.
2559  */
2560 static char *
2561 getbuf(bufhdr *bufhead, int size)
2562 {
2563 	bufhdr *pbuf;
2564 	bufhdr *prev;
2565 	int i;
2566 	int buf_size, max_bufs;
2567 
2568 	/*
2569 	 * Initialize all the buffers
2570 	 */
2571 	if (bufhead->head == NULL) {
2572 		/*
2573 		 * round up the size of our buffer header to a
2574 		 * 16 byte boundary so the address we return to
2575 		 * the caller is "suitably aligned".
2576 		 */
2577 		bufhdrsize = (sizeof (bufhdr) + 15) & ~15;
2578 
2579 		/*
2580 		 * Add in our header to the buffer and round it all up to
2581 		 * a 16 byte boundry so each member of the slab is aligned.
2582 		 */
2583 		buf_size = (size + bufhdrsize + 15) & ~15;
2584 
2585 		/*
2586 		 * Limit number of buffers to lesser of MAXBUFMEM's worth
2587 		 * or MAXBUF, whichever is less.
2588 		 */
2589 		max_bufs = MAXBUFMEM / buf_size;
2590 		if (max_bufs > MAXBUF)
2591 			max_bufs = MAXBUF;
2592 
2593 		pbuf = (bufhdr *)calloc(max_bufs, buf_size);
2594 		if (pbuf == NULL) {
2595 			perror("calloc");
2596 			lockexit(32);
2597 		}
2598 
2599 		bufhead->head = bufhead;
2600 		prev = bufhead;
2601 		for (i = 0; i < max_bufs; i++) {
2602 			pbuf->head = bufhead;
2603 			prev->next = pbuf;
2604 			prev = pbuf;
2605 			pbuf = (bufhdr *)((char *)pbuf + buf_size);
2606 		}
2607 	}
2608 
2609 	/*
2610 	 * Get an available buffer, waiting for I/O if necessary
2611 	 */
2612 	wait_for_write(NOBLOCK);
2613 	while (bufhead->next == NULL)
2614 		wait_for_write(BLOCK);
2615 
2616 	/*
2617 	 * Take the buffer off the list
2618 	 */
2619 	pbuf = bufhead->next;
2620 	bufhead->next = pbuf->next;
2621 	pbuf->next = NULL;
2622 
2623 	/*
2624 	 * return the empty buffer space just past the header
2625 	 */
2626 	return ((char *)pbuf + bufhdrsize);
2627 }
2628 
2629 /*
2630  * freebuf()	-- Free a buffer gotten previously through getbuf.
2631  *		Puts the buffer back on the appropriate list for
2632  *		later use.  Never calls free().
2633  *
2634  * Assumes that SIGINT is blocked.
2635  */
2636 static void
2637 freebuf(char *buf)
2638 {
2639 	bufhdr *pbuf;
2640 	bufhdr *bufhead;
2641 
2642 	/*
2643 	 * get the header for this buffer
2644 	 */
2645 	pbuf = (bufhdr *)(buf - bufhdrsize);
2646 
2647 	/*
2648 	 * Put it back on the list of available buffers
2649 	 */
2650 	bufhead = pbuf->head;
2651 	pbuf->next = bufhead->next;
2652 	bufhead->next = pbuf;
2653 }
2654 
2655 /*
2656  * freetrans()	-- Free a transaction gotten previously through getaiop.
2657  *		Puts the transaction struct back on the appropriate list for
2658  *		later use.  Never calls free().
2659  *
2660  * Assumes that SIGINT is blocked.
2661  */
2662 static void
2663 freetrans(aio_trans *transp)
2664 {
2665 	/*
2666 	 * free the buffer associated with this AIO if needed
2667 	 */
2668 	if (transp->release == RELEASE)
2669 		freebuf(transp->buffer);
2670 
2671 	/*
2672 	 * Put transaction on the free list
2673 	 */
2674 	transp->next = results.trans;
2675 	results.trans = transp;
2676 }
2677 
2678 /*
2679  * wait_for_write()	-- Wait for an aio write to complete.  Return
2680  *			the transaction structure for that write.
2681  *
2682  * Blocks SIGINT if necessary.
2683  */
2684 aio_trans *
2685 wait_for_write(int block)
2686 {
2687 	aio_trans	*transp;
2688 	aio_result_t	*resultp;
2689 	static struct timeval  zero_wait = { 0, 0 };
2690 	sigset_t	old_mask;
2691 
2692 	/*
2693 	 * If we know there aren't any outstanding transactions, just return
2694 	 */
2695 	if (results.outstanding == 0)
2696 		return ((aio_trans *) 0);
2697 
2698 	block_sigint(&old_mask);
2699 
2700 	resultp = aiowait(block ? NULL : &zero_wait);
2701 	if (resultp == NULL ||
2702 	    (resultp == (aio_result_t *)-1 && errno == EINVAL)) {
2703 		unblock_sigint(&old_mask);
2704 		return ((aio_trans *) 0);
2705 	}
2706 
2707 	results.outstanding--;
2708 	transp = (aio_trans *)resultp;
2709 
2710 	if (resultp->aio_return != transp->size) {
2711 		if (resultp->aio_return == -1) {
2712 			/*
2713 			 * The aiowrite() may have failed because the
2714 			 * kernel didn't have enough memory to do the job.
2715 			 * Flush all pending writes and try a normal
2716 			 * write().  wtfs_breakup() will call exit if it
2717 			 * fails, so we don't worry about errors here.
2718 			 */
2719 			flush_writes();
2720 			wtfs_breakup(transp->bno, transp->size, transp->buffer);
2721 		} else {
2722 			(void) fprintf(stderr, gettext(
2723 			    "short write (%d of %d bytes) on sector %lld\n"),
2724 			    resultp->aio_return, transp->size,
2725 			    transp->bno);
2726 			/*
2727 			 * Don't unblock SIGINT, to avoid potential
2728 			 * looping due to queued interrupts and
2729 			 * error handling.
2730 			 */
2731 			lockexit(32);
2732 		}
2733 	}
2734 
2735 	resultp->aio_return = 0;
2736 	freetrans(transp);
2737 	unblock_sigint(&old_mask);
2738 	return (transp);
2739 }
2740 
2741 /*
2742  * flush_writes()	-- flush all the outstanding aio writes.
2743  */
2744 static void
2745 flush_writes(void)
2746 {
2747 	while (wait_for_write(BLOCK))
2748 		;
2749 }
2750 
2751 /*
2752  * get_aiop()	-- find and return an aio_trans structure on which a new
2753  *		aio can be done.  Blocks on aiowait() if needed.  Reaps
2754  *		all outstanding completed aio's.
2755  *
2756  * Assumes that SIGINT is blocked.
2757  */
2758 aio_trans *
2759 get_aiop()
2760 {
2761 	int i;
2762 	aio_trans *transp;
2763 	aio_trans *prev;
2764 
2765 	/*
2766 	 * initialize aio stuff
2767 	 */
2768 	if (!aio_inited) {
2769 		aio_inited = 1;
2770 
2771 		results.maxpend = 0;
2772 		results.outstanding = 0;
2773 		results.max = MAXAIO;
2774 
2775 		results.trans = (aio_trans *)calloc(results.max,
2776 						sizeof (aio_trans));
2777 		if (results.trans == NULL) {
2778 			perror("calloc");
2779 			lockexit(32);
2780 		}
2781 
2782 		/*
2783 		 * Initialize the linked list of aio transaction
2784 		 * structures.  Note that the final "next" pointer
2785 		 * will be NULL since we got the buffer from calloc().
2786 		 */
2787 		prev = results.trans;
2788 		for (i = 1; i < results.max; i++) {
2789 			prev->next = &(results.trans[i]);
2790 			prev = prev->next;
2791 		}
2792 	}
2793 
2794 	wait_for_write(NOBLOCK);
2795 	while (results.trans == NULL)
2796 		wait_for_write(BLOCK);
2797 	transp = results.trans;
2798 	results.trans = results.trans->next;
2799 
2800 	transp->next = 0;
2801 	transp->resultbuf.aio_return = AIO_INPROGRESS;
2802 	return (transp);
2803 }
2804 
2805 /*
2806  * read a block from the file system
2807  */
2808 static void
2809 rdfs(diskaddr_t bno, int size, char *bf)
2810 {
2811 	int n, saverr;
2812 
2813 	/*
2814 	 * In case we need any data that's pending in an aiowrite(),
2815 	 * we wait for them all to complete before doing a read.
2816 	 */
2817 	flush_writes();
2818 
2819 	/*
2820 	 * Note: the llseek() can succeed, even if the offset is out of range.
2821 	 * It's not until the file i/o operation (the read()) that one knows
2822 	 * for sure if the raw device can handle the offset.
2823 	 */
2824 	if (llseek(fsi, (offset_t)bno * sectorsize, 0) < 0) {
2825 		saverr = errno;
2826 		(void) fprintf(stderr,
2827 		    gettext("seek error on sector %lld: %s\n"),
2828 		    bno, strerror(saverr));
2829 		lockexit(32);
2830 	}
2831 	n = read(fsi, bf, size);
2832 	if (n != size) {
2833 		saverr = errno;
2834 		if (n == -1)
2835 			(void) fprintf(stderr,
2836 			    gettext("read error on sector %lld: %s\n"),
2837 			    bno, strerror(saverr));
2838 		else
2839 			(void) fprintf(stderr, gettext(
2840 			    "short read (%d of %d bytes) on sector %lld\n"),
2841 			    n, size, bno);
2842 		lockexit(32);
2843 	}
2844 }
2845 
2846 /*
2847  * write a block to the file system
2848  */
2849 static void
2850 wtfs(diskaddr_t bno, int size, char *bf)
2851 {
2852 	int n, saverr;
2853 
2854 	if (fso == -1)
2855 		return;
2856 
2857 	/*
2858 	 * Note: the llseek() can succeed, even if the offset is out of range.
2859 	 * It's not until the file i/o operation (the write()) that one knows
2860 	 * for sure if the raw device can handle the offset.
2861 	 */
2862 	if (llseek(fso, (offset_t)bno * sectorsize, 0) < 0) {
2863 		saverr = errno;
2864 		(void) fprintf(stderr,
2865 		    gettext("seek error on sector %lld: %s\n"),
2866 		    bno, strerror(saverr));
2867 		lockexit(32);
2868 	}
2869 	if (Nflag)
2870 		return;
2871 	n = write(fso, bf, size);
2872 	if (n != size) {
2873 		saverr = errno;
2874 		if (n == -1)
2875 			(void) fprintf(stderr,
2876 			    gettext("write error on sector %lld: %s\n"),
2877 			    bno, strerror(saverr));
2878 		else
2879 			(void) fprintf(stderr, gettext(
2880 			    "short write (%d of %d bytes) on sector %lld\n"),
2881 			    n, size, bno);
2882 		lockexit(32);
2883 	}
2884 }
2885 
2886 /*
2887  * write a block to the file system -- buffered with aio
2888  */
2889 static void
2890 awtfs(diskaddr_t bno, int size, char *bf, int release)
2891 {
2892 	int n;
2893 	aio_trans 	*transp;
2894 	sigset_t 	old_mask;
2895 
2896 	if (fso == -1)
2897 		return;
2898 
2899 	/*
2900 	 * We need to keep things consistent if we get interrupted,
2901 	 * so defer any expected interrupts for the time being.
2902 	 */
2903 	block_sigint(&old_mask);
2904 
2905 	if (Nflag) {
2906 		if (release == RELEASE)
2907 			freebuf(bf);
2908 	} else {
2909 		transp = get_aiop();
2910 		transp->bno = bno;
2911 		transp->buffer = bf;
2912 		transp->size = size;
2913 		transp->release = release;
2914 
2915 		n = aiowrite(fso, bf, size, (off_t)bno * sectorsize,
2916 				SEEK_SET, &transp->resultbuf);
2917 
2918 		if (n < 0) {
2919 			/*
2920 			 * The aiowrite() may have failed because the
2921 			 * kernel didn't have enough memory to do the job.
2922 			 * Flush all pending writes and try a normal
2923 			 * write().  wtfs_breakup() will call exit if it
2924 			 * fails, so we don't worry about errors here.
2925 			 */
2926 			flush_writes();
2927 			wtfs_breakup(transp->bno, transp->size, transp->buffer);
2928 			freetrans(transp);
2929 		} else {
2930 			/*
2931 			 * Keep track of our pending writes.
2932 			 */
2933 			results.outstanding++;
2934 			if (results.outstanding > results.maxpend)
2935 			    results.maxpend = results.outstanding;
2936 		}
2937 	}
2938 
2939 	unblock_sigint(&old_mask);
2940 }
2941 
2942 
2943 /*
2944  * write a block to the file system, but break it up into sbsize
2945  * chunks to avoid forcing a large amount of memory to be locked down.
2946  * Only used as a fallback when an aio write has failed.
2947  */
2948 static void
2949 wtfs_breakup(diskaddr_t bno, int size, char *bf)
2950 {
2951 	int n, saverr;
2952 	int wsize;
2953 	int block_incr = sbsize / sectorsize;
2954 
2955 	if (size < sbsize)
2956 		wsize = size;
2957 	else
2958 		wsize = sbsize;
2959 
2960 	n = 0;
2961 	while (size) {
2962 		/*
2963 		 * Note: the llseek() can succeed, even if the offset is
2964 		 * out of range.  It's not until the file i/o operation
2965 		 * (the write()) that one knows for sure if the raw device
2966 		 * can handle the offset.
2967 		 */
2968 		if (llseek(fso, (offset_t)bno * sectorsize, 0) < 0) {
2969 			saverr = errno;
2970 			(void) fprintf(stderr,
2971 			    gettext("seek error on sector %lld: %s\n"),
2972 			    bno, strerror(saverr));
2973 			lockexit(32);
2974 		}
2975 
2976 		n = write(fso, bf, wsize);
2977 		if (n == -1) {
2978 			saverr = errno;
2979 			(void) fprintf(stderr,
2980 			    gettext("write error on sector %lld: %s\n"),
2981 			    bno, strerror(saverr));
2982 			lockexit(32);
2983 		}
2984 		if (n != wsize) {
2985 			saverr = errno;
2986 			(void) fprintf(stderr, gettext(
2987 			    "short write (%d of %d bytes) on sector %lld\n"),
2988 			    n, size, bno);
2989 			lockexit(32);
2990 		}
2991 
2992 		bno += block_incr;
2993 		bf += wsize;
2994 		size -= wsize;
2995 		if (size < wsize)
2996 			wsize = size;
2997 	}
2998 }
2999 
3000 
3001 /*
3002  * check if a block is available
3003  */
3004 static int
3005 isblock(struct fs *fs, unsigned char *cp, int h)
3006 {
3007 	unsigned char mask;
3008 
3009 	switch (fs->fs_frag) {
3010 	case 8:
3011 		return (cp[h] == 0xff);
3012 	case 4:
3013 		mask = 0x0f << ((h & 0x1) << 2);
3014 		return ((cp[h >> 1] & mask) == mask);
3015 	case 2:
3016 		mask = 0x03 << ((h & 0x3) << 1);
3017 		return ((cp[h >> 2] & mask) == mask);
3018 	case 1:
3019 		mask = 0x01 << (h & 0x7);
3020 		return ((cp[h >> 3] & mask) == mask);
3021 	default:
3022 		(void) fprintf(stderr, "isblock bad fs_frag %d\n", fs->fs_frag);
3023 		return (0);
3024 	}
3025 }
3026 
3027 /*
3028  * take a block out of the map
3029  */
3030 static void
3031 clrblock(struct fs *fs, unsigned char *cp, int h)
3032 {
3033 	switch ((fs)->fs_frag) {
3034 	case 8:
3035 		cp[h] = 0;
3036 		return;
3037 	case 4:
3038 		cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2));
3039 		return;
3040 	case 2:
3041 		cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1));
3042 		return;
3043 	case 1:
3044 		cp[h >> 3] &= ~(0x01 << (h & 0x7));
3045 		return;
3046 	default:
3047 		(void) fprintf(stderr,
3048 		    gettext("clrblock: bad fs_frag value %d\n"), fs->fs_frag);
3049 		return;
3050 	}
3051 }
3052 
3053 /*
3054  * put a block into the map
3055  */
3056 static void
3057 setblock(struct fs *fs, unsigned char *cp, int h)
3058 {
3059 	switch (fs->fs_frag) {
3060 	case 8:
3061 		cp[h] = 0xff;
3062 		return;
3063 	case 4:
3064 		cp[h >> 1] |= (0x0f << ((h & 0x1) << 2));
3065 		return;
3066 	case 2:
3067 		cp[h >> 2] |= (0x03 << ((h & 0x3) << 1));
3068 		return;
3069 	case 1:
3070 		cp[h >> 3] |= (0x01 << (h & 0x7));
3071 		return;
3072 	default:
3073 		(void) fprintf(stderr,
3074 		    gettext("setblock: bad fs_frag value %d\n"), fs->fs_frag);
3075 		return;
3076 	}
3077 }
3078 
3079 static void
3080 usage()
3081 {
3082 	(void) fprintf(stderr,
3083 	    gettext("ufs usage: mkfs [-F FSType] [-V] [-m] [-o options] "
3084 		"special "			/* param 0 */
3085 		"size(sectors) \\ \n"));	/* param 1 */
3086 	(void) fprintf(stderr,
3087 		"[nsect "			/* param 2 */
3088 		"ntrack "			/* param 3 */
3089 		"bsize "			/* param 4 */
3090 		"fragsize "			/* param 5 */
3091 		"cpg "				/* param 6 */
3092 		"free "				/* param 7 */
3093 		"rps "				/* param 8 */
3094 		"nbpi "				/* param 9 */
3095 		"opt "				/* param 10 */
3096 		"apc "				/* param 11 */
3097 		"gap "				/* param 12 */
3098 		"nrpos "			/* param 13 */
3099 		"maxcontig "			/* param 14 */
3100 		"mtb]\n");			/* param 15 */
3101 	(void) fprintf(stderr,
3102 		gettext(" -m : dump fs cmd line used to make this partition\n"
3103 		" -V :print this command line and return\n"
3104 		" -o :ufs options: :nsect=%d,ntrack=%d,bsize=%d,fragsize=%d\n"
3105 		" -o :ufs options: :cgsize=%d,free=%d,rps=%d,nbpi=%d,opt=%c\n"
3106 		" -o :ufs options: :apc=%d,gap=%d,nrpos=%d,maxcontig=%d\n"
3107 		" -o :ufs options: :mtb=%c\n"
3108 "NOTE that all -o suboptions: must be separated only by commas so as to\n"
3109 "be parsed as a single argument\n"),
3110 		nsect, ntrack, bsize, fragsize, cpg, sblock.fs_minfree, rps,
3111 		nbpi, opt, apc, (rotdelay == -1) ? 0 : rotdelay,
3112 		sblock.fs_nrpos, maxcontig, mtb);
3113 	lockexit(32);
3114 }
3115 
3116 /*ARGSUSED*/
3117 static void
3118 dump_fscmd(char *fsys, int fsi)
3119 {
3120 	int64_t used, bpcg, inospercg;
3121 	int64_t nbpi;
3122 	uint64_t nbytes64;
3123 
3124 	bzero((char *)&sblock, sizeof (sblock));
3125 	rdfs((diskaddr_t)SBLOCK, SBSIZE, (char *)&sblock);
3126 
3127 	/*
3128 	 * ensure a valid file system and if not, exit with error or else
3129 	 * we will end up computing block numbers etc and dividing by zero
3130 	 * which will cause floating point errors in this routine.
3131 	 */
3132 
3133 	if ((sblock.fs_magic != FS_MAGIC) &&
3134 	    (sblock.fs_magic != MTB_UFS_MAGIC)) {
3135 	    (void) fprintf(stderr, gettext(
3136 		"[not currently a valid file system - bad superblock]\n"));
3137 		lockexit(32);
3138 	}
3139 
3140 	if (sblock.fs_magic == MTB_UFS_MAGIC &&
3141 	    (sblock.fs_version > MTB_UFS_VERSION_1 ||
3142 	    sblock.fs_version < MTB_UFS_VERSION_MIN)) {
3143 	    (void) fprintf(stderr, gettext(
3144 		"Unknown version of UFS format: %d\n"), sblock.fs_version);
3145 		lockexit(32);
3146 	}
3147 
3148 	/*
3149 	 * Compute a reasonable nbpi value.
3150 	 * The algorithm for "used" is copied from code
3151 	 * in main() verbatim.
3152 	 * The nbpi equation is taken from main where the
3153 	 * fs_ipg value is set for the last time.  The INOPB(...) - 1
3154 	 * is used to account for the roundup.
3155 	 * The problem is that a range of nbpi values map to
3156 	 * the same file system layout.  So it is not possible
3157 	 * to calculate the exact value specified when the file
3158 	 * system was created.  So instead we determine the top
3159 	 * end of the range of values.
3160 	 */
3161 	bpcg = sblock.fs_spc * sectorsize;
3162 	inospercg = (int64_t)roundup(bpcg / sizeof (struct dinode),
3163 	    INOPB(&sblock));
3164 	if (inospercg > MAXIpG(&sblock))
3165 		inospercg = MAXIpG(&sblock);
3166 	used = (int64_t)
3167 	    (sblock.fs_iblkno + inospercg / INOPF(&sblock)) * NSPF(&sblock);
3168 	used *= sectorsize;
3169 	nbytes64 = (uint64_t)sblock.fs_cpg * bpcg - used;
3170 
3171 	/*
3172 	 * The top end of the range of values for nbpi may not be
3173 	 * a valid command line value for mkfs. Report the bottom
3174 	 * end instead.
3175 	 */
3176 	nbpi = (int64_t)(nbytes64 / (sblock.fs_ipg));
3177 
3178 	(void) fprintf(stdout, gettext("mkfs -F ufs -o "), fsys);
3179 	(void) fprintf(stdout, "nsect=%d,ntrack=%d,",
3180 	    sblock.fs_nsect, sblock.fs_ntrak);
3181 	(void) fprintf(stdout, "bsize=%d,fragsize=%d,cgsize=%d,free=%d,",
3182 	    sblock.fs_bsize, sblock.fs_fsize, sblock.fs_cpg, sblock.fs_minfree);
3183 	(void) fprintf(stdout, "rps=%d,nbpi=%lld,opt=%c,apc=%d,gap=%d,",
3184 	    sblock.fs_rps, nbpi, (sblock.fs_optim == FS_OPTSPACE) ? 's' : 't',
3185 	    (sblock.fs_ntrak * sblock.fs_nsect) - sblock.fs_spc,
3186 	    sblock.fs_rotdelay);
3187 	(void) fprintf(stdout, "nrpos=%d,maxcontig=%d,mtb=%c ",
3188 	    sblock.fs_nrpos, sblock.fs_maxcontig,
3189 	    ((sblock.fs_magic == MTB_UFS_MAGIC) ? 'y' : 'n'));
3190 	(void) fprintf(stdout, "%s %lld\n", fsys,
3191 	    fsbtodb(&sblock, sblock.fs_size));
3192 
3193 	bzero((char *)&sblock, sizeof (sblock));
3194 }
3195 
3196 /* number ************************************************************* */
3197 /*									*/
3198 /* Convert a numeric string arg to binary				*/
3199 /*									*/
3200 /* Args:	d_value - default value, if have parse error		*/
3201 /*		param - the name of the argument, for error messages	*/
3202 /*		flags - parser state and what's allowed in the arg	*/
3203 /* Global arg:  string - pointer to command arg				*/
3204 /*									*/
3205 /* Valid forms: 123 | 123k | 123*123 | 123x123				*/
3206 /*									*/
3207 /* Return:	converted number					*/
3208 /*									*/
3209 /* ******************************************************************** */
3210 
3211 static uint64_t
3212 number(uint64_t d_value, char *param, int flags)
3213 {
3214 	char *cs;
3215 	uint64_t n, t;
3216 	uint64_t cut = BIG / 10;    /* limit to avoid overflow */
3217 	int minus = 0;
3218 
3219 	cs = string;
3220 	if (*cs == '-') {
3221 		minus = 1;
3222 		cs += 1;
3223 	}
3224 	if ((*cs < '0') || (*cs > '9')) {
3225 		goto bail_out;
3226 	}
3227 	n = 0;
3228 	while ((*cs >= '0') && (*cs <= '9') && (n <= cut)) {
3229 		n = n*10 + *cs++ - '0';
3230 	}
3231 	if (minus)
3232 	    n = -n;
3233 	for (;;) {
3234 		switch (*cs++) {
3235 		case 'k':
3236 			if (flags & ALLOW_END_ONLY)
3237 				goto bail_out;
3238 			if (n > (BIG / 1024))
3239 				goto overflow;
3240 			n *= 1024;
3241 			continue;
3242 
3243 		case '*':
3244 		case 'x':
3245 			if (flags & ALLOW_END_ONLY)
3246 				goto bail_out;
3247 			string = cs;
3248 			t = number(d_value, param, flags);
3249 			if (n > (BIG / t))
3250 				goto overflow;
3251 			n *= t;
3252 			cs = string + 1; /* adjust for -- below */
3253 
3254 			/* recursion has read rest of expression */
3255 			/* FALLTHROUGH */
3256 
3257 		case ',':
3258 		case '\0':
3259 			cs--;
3260 			string = cs;
3261 			return (n);
3262 
3263 		case '%':
3264 			if (flags & ALLOW_END_ONLY)
3265 				goto bail_out;
3266 			if (flags & ALLOW_PERCENT) {
3267 				flags &= ~ALLOW_PERCENT;
3268 				flags |= ALLOW_END_ONLY;
3269 				continue;
3270 			}
3271 			goto bail_out;
3272 
3273 		case 'm':
3274 			if (flags & ALLOW_END_ONLY)
3275 				goto bail_out;
3276 			if (flags & ALLOW_MS1) {
3277 				flags &= ~ALLOW_MS1;
3278 				flags |= ALLOW_MS2;
3279 				continue;
3280 			}
3281 			goto bail_out;
3282 
3283 		case 's':
3284 			if (flags & ALLOW_END_ONLY)
3285 				goto bail_out;
3286 			if (flags & ALLOW_MS2) {
3287 				flags &= ~ALLOW_MS2;
3288 				flags |= ALLOW_END_ONLY;
3289 				continue;
3290 			}
3291 			goto bail_out;
3292 
3293 		case '0': case '1': case '2': case '3': case '4':
3294 		case '5': case '6': case '7': case '8': case '9':
3295 overflow:
3296 			(void) fprintf(stderr,
3297 			    gettext("mkfs: value for %s overflowed\n"),
3298 			    param);
3299 			while ((*cs != '\0') && (*cs != ','))
3300 				cs++;
3301 			string = cs;
3302 			return (BIG);
3303 
3304 		default:
3305 bail_out:
3306 			(void) fprintf(stderr, gettext(
3307 			    "mkfs: bad numeric arg for %s: \"%s\"\n"),
3308 			    param, string);
3309 			while ((*cs != '\0') && (*cs != ','))
3310 				cs++;
3311 			string = cs;
3312 			if (d_value != NO_DEFAULT) {
3313 				(void) fprintf(stderr,
3314 				    gettext("mkfs: %s reset to default %lld\n"),
3315 				    param, d_value);
3316 				return (d_value);
3317 			}
3318 			lockexit(2);
3319 
3320 		}
3321 	} /* never gets here */
3322 }
3323 
3324 /* match ************************************************************** */
3325 /*									*/
3326 /* Compare two text strings for equality				*/
3327 /*									*/
3328 /* Arg:	 s - pointer to string to match with a command arg		*/
3329 /* Global arg:  string - pointer to command arg				*/
3330 /*									*/
3331 /* Return:	1 if match, 0 if no match				*/
3332 /*		If match, also reset `string' to point to the text	*/
3333 /*		that follows the matching text.				*/
3334 /*									*/
3335 /* ******************************************************************** */
3336 
3337 static int
3338 match(char *s)
3339 {
3340 	char *cs;
3341 
3342 	cs = string;
3343 	while (*cs++ == *s) {
3344 		if (*s++ == '\0') {
3345 			goto true;
3346 		}
3347 	}
3348 	if (*s != '\0') {
3349 		return (0);
3350 	}
3351 
3352 true:
3353 	cs--;
3354 	string = cs;
3355 	return (1);
3356 }
3357 
3358 /*
3359  * GROWFS ROUTINES
3360  */
3361 
3362 /* ARGSUSED */
3363 void
3364 lockexit(int exitstatus)
3365 {
3366 	if (Pflag) {
3367 		/* the probe mode neither changes nor locks the filesystem */
3368 		exit(exitstatus);
3369 	}
3370 
3371 	/*
3372 	 * flush the dirty cylinder group
3373 	 */
3374 	if (inlockexit == 0) {
3375 		inlockexit = 1;
3376 		flcg();
3377 	}
3378 
3379 	if (aio_inited) {
3380 		flush_writes();
3381 	}
3382 
3383 	/*
3384 	 * make sure the file system is unlocked before exiting
3385 	 */
3386 	if ((inlockexit == 1) && (!isbad)) {
3387 		inlockexit = 2;
3388 		ulockfs();
3389 		/*
3390 		 * if logging was enabled, then re-enable it
3391 		 */
3392 		if (waslog) {
3393 			if (rl_log_control(fsys, _FIOLOGENABLE) != RL_SUCCESS) {
3394 				(void) fprintf(stderr, gettext(
3395 					"failed to re-enable logging\n"));
3396 			}
3397 		}
3398 	} else if (grow) {
3399 		if (isbad) {
3400 			(void) fprintf(stderr, gettext(
3401 				"Filesystem is currently inconsistent.  It "
3402 				"must be repaired with fsck(1M)\nbefore being "
3403 				"used.  Use the following command to "
3404 				"do this:\n\n\tfsck %s\n\n"),
3405 					fsys);
3406 
3407 			if (ismounted) {
3408 				(void) fprintf(stderr, gettext(
3409 					"You will be told that the filesystem "
3410 					"is already mounted, and asked if you\n"
3411 					"wish to continue.  Answer `yes' to "
3412 					"this question.\n\n"));
3413 			}
3414 
3415 			(void) fprintf(stderr, gettext(
3416 					"One problem should be reported, that "
3417 					"the summary information is bad.\n"
3418 					"You will then be asked if it "
3419 					"should be salvaged.  Answer `yes' "
3420 					"to\nthis question.\n\n"));
3421 		}
3422 
3423 		if (ismounted) {
3424 			/*
3425 			 * In theory, there's no way to get here without
3426 			 * isbad also being set, but be robust in the
3427 			 * face of future code changes.
3428 			 */
3429 			(void) fprintf(stderr, gettext(
3430 				"The filesystem is currently mounted "
3431 				"read-only and write-locked.  "));
3432 			if (isbad) {
3433 				(void) fprintf(stderr, gettext(
3434 					"After\nrunning fsck, unlock the "
3435 					"filesystem and "));
3436 			} else {
3437 				(void) fprintf(stderr, gettext(
3438 					"Unlock the filesystem\nand "));
3439 			}
3440 
3441 			(void) fprintf(stderr, gettext(
3442 				"re-enable writing with\nthe following "
3443 				"command:\n\n\tlockfs -u %s\n\n"),
3444 					directory);
3445 		}
3446 	}
3447 
3448 	exit(exitstatus);
3449 }
3450 
3451 void
3452 randomgeneration()
3453 {
3454 	int		 i;
3455 	struct dinode	*dp;
3456 
3457 	/*
3458 	 * always perform fsirand(1) function... newfs will notice that
3459 	 * the inodes have been randomized and will not call fsirand itself
3460 	 */
3461 	for (i = 0, dp = zino; i < sblock.fs_inopb; ++i, ++dp)
3462 		IRANDOMIZE(&dp->di_ic);
3463 }
3464 
3465 /*
3466  * Check the size of the summary information.
3467  * Fields in sblock are not changed in this function.
3468  *
3469  * For an 8K filesystem block, the maximum number of cylinder groups is 16384.
3470  *     MAXCSBUFS {32}  *   8K  {FS block size}
3471  *                         divided by (sizeof csum) {16}
3472  *
3473  * Note that MAXCSBUFS is not used in the kernel; as of Solaris 2.6 build 32,
3474  * this is the only place where it's referenced.
3475  */
3476 void
3477 checksummarysize()
3478 {
3479 	diskaddr_t	dmax;
3480 	diskaddr_t	dmin;
3481 	int64_t	cg0frags;
3482 	int64_t	cg0blocks;
3483 	int64_t	maxncg;
3484 	int64_t	maxfrags;
3485 	uint64_t	fs_size;
3486 	uint64_t maxfs_blocks; /* filesystem blocks for max filesystem size */
3487 
3488 	/*
3489 	 * compute the maximum summary info size
3490 	 */
3491 	dmin = cgdmin(&sblock, 0);
3492 	dmax = cgbase(&sblock, 0) + sblock.fs_fpg;
3493 	fs_size = (grow) ? grow_fs_size : sblock.fs_size;
3494 	if (dmax > fs_size)
3495 		dmax = fs_size;
3496 	cg0frags  = dmax - dmin;
3497 	cg0blocks = cg0frags / sblock.fs_frag;
3498 	cg0frags = cg0blocks * sblock.fs_frag;
3499 	maxncg   = (longlong_t)cg0blocks *
3500 	    (longlong_t)(sblock.fs_bsize / sizeof (struct csum));
3501 
3502 	maxfs_blocks = FS_MAX;
3503 
3504 	if (maxncg > ((longlong_t)maxfs_blocks / (longlong_t)sblock.fs_fpg) + 1)
3505 		maxncg = ((longlong_t)maxfs_blocks /
3506 		    (longlong_t)sblock.fs_fpg) + 1;
3507 
3508 	maxfrags = maxncg * (longlong_t)sblock.fs_fpg;
3509 
3510 	if (maxfrags > maxfs_blocks)
3511 		maxfrags = maxfs_blocks;
3512 
3513 
3514 	/*
3515 	 * remember for later processing in extendsummaryinfo()
3516 	 */
3517 	if (test)
3518 		grow_sifrag = dmin + (cg0blocks * sblock.fs_frag);
3519 	if (testfrags == 0)
3520 		testfrags = cg0frags;
3521 	if (testforce)
3522 		if (testfrags > cg0frags) {
3523 			(void) fprintf(stderr,
3524 				gettext("Too many test frags (%lld); "
3525 				"try %lld\n"), testfrags, cg0frags);
3526 			lockexit(32);
3527 		}
3528 
3529 	/*
3530 	 * if summary info is too large (too many cg's) tell the user and exit
3531 	 */
3532 	if ((longlong_t)sblock.fs_size > maxfrags) {
3533 		(void) fprintf(stderr, gettext(
3534 		    "Too many cylinder groups with %llu sectors;\n    try "
3535 		    "increasing cgsize, or decreasing fssize to %llu\n"),
3536 		    fsbtodb(&sblock, (uint64_t)sblock.fs_size),
3537 		    fsbtodb(&sblock, (uint64_t)maxfrags));
3538 		lockexit(32);
3539 	}
3540 }
3541 
3542 void
3543 checksblock()
3544 {
3545 	/*
3546 	 * make sure this is a file system
3547 	 */
3548 	if ((sblock.fs_magic != FS_MAGIC) &&
3549 	    (sblock.fs_magic != MTB_UFS_MAGIC)) {
3550 		(void) fprintf(stderr,
3551 			gettext("Bad superblock; magic number wrong\n"));
3552 		lockexit(32);
3553 	}
3554 
3555 	if (sblock.fs_magic == MTB_UFS_MAGIC &&
3556 	    sblock.fs_version > MTB_UFS_VERSION_1) {
3557 		(void) fprintf(stderr,
3558 			gettext("Unrecognized version of UFS\n"));
3559 		lockexit(32);
3560 	}
3561 
3562 	if (sblock.fs_ncg < 1) {
3563 		(void) fprintf(stderr,
3564 		    gettext("Bad superblock; ncg out of range\n"));
3565 		lockexit(32);
3566 	}
3567 	if (sblock.fs_cpg < 1) {
3568 		(void) fprintf(stderr,
3569 		    gettext("Bad superblock; cpg out of range\n"));
3570 		lockexit(32);
3571 	}
3572 	if (sblock.fs_ncg * sblock.fs_cpg < sblock.fs_ncyl ||
3573 	    (sblock.fs_ncg - 1) * sblock.fs_cpg >= sblock.fs_ncyl) {
3574 		(void) fprintf(stderr,
3575 		    gettext("Bad superblock; ncyl out of range\n"));
3576 		lockexit(32);
3577 	}
3578 	if (sblock.fs_sbsize <= 0 || sblock.fs_sbsize > sblock.fs_bsize) {
3579 		(void) fprintf(stderr, gettext(
3580 			"Bad superblock; superblock size out of range\n"));
3581 		lockexit(32);
3582 	}
3583 }
3584 
3585 /*
3586  * Roll the embedded log, if any, and set up the global variables
3587  * islog, islogok and isufslog.
3588  */
3589 static void
3590 logsetup(char *devstr)
3591 {
3592 	void		*buf, *ud_buf;
3593 	extent_block_t	*ebp;
3594 	ml_unit_t	*ul;
3595 	ml_odunit_t	*ud;
3596 
3597 	/*
3598 	 * Does the superblock indicate that we are supposed to have a log ?
3599 	 */
3600 	if (sblock.fs_logbno == 0) {
3601 		/*
3602 		 * No log present, nothing to do.
3603 		 */
3604 		islogok = 0;
3605 		islog = 0;
3606 		isufslog = 0;
3607 		return;
3608 	} else {
3609 		/*
3610 		 * There's a log in a yet unknown state, attempt to roll it.
3611 		 */
3612 		islog = 1;
3613 		islogok = 0;
3614 		isufslog = 0;
3615 
3616 		/*
3617 		 * We failed to roll the log, bail out.
3618 		 */
3619 		if (rl_roll_log(devstr) != RL_SUCCESS)
3620 			return;
3621 
3622 		isufslog = 1;
3623 
3624 		/* log is not okay; check the fs */
3625 		if ((FSOKAY != (sblock.fs_state + sblock.fs_time)) ||
3626 		    (sblock.fs_clean != FSLOG))
3627 			return;
3628 
3629 		/* get the log allocation block */
3630 		buf = (void *)malloc(DEV_BSIZE);
3631 		if (buf == (void *) NULL)
3632 			return;
3633 
3634 		ud_buf = (void *)malloc(DEV_BSIZE);
3635 		if (ud_buf == (void *) NULL) {
3636 			free(buf);
3637 			return;
3638 		}
3639 
3640 		rdfs((diskaddr_t)logbtodb(&sblock, sblock.fs_logbno),
3641 		    DEV_BSIZE, buf);
3642 		ebp = (extent_block_t *)buf;
3643 
3644 		/* log allocation block is not okay; check the fs */
3645 		if (ebp->type != LUFS_EXTENTS) {
3646 			free(buf);
3647 			free(ud_buf);
3648 			return;
3649 		}
3650 
3651 		/* get the log state block(s) */
3652 		rdfs((diskaddr_t)logbtodb(&sblock, ebp->extents[0].pbno),
3653 		    DEV_BSIZE, ud_buf);
3654 		ud = (ml_odunit_t *)ud_buf;
3655 		ul = (ml_unit_t *)malloc(sizeof (*ul));
3656 		ul->un_ondisk = *ud;
3657 
3658 		/* log state is okay */
3659 		if ((ul->un_chksum == ul->un_head_ident + ul->un_tail_ident) &&
3660 		    (ul->un_version == LUFS_VERSION_LATEST) &&
3661 		    (ul->un_badlog == 0))
3662 			islogok = 1;
3663 		free(ud_buf);
3664 		free(buf);
3665 		free(ul);
3666 	}
3667 }
3668 
3669 void
3670 growinit(char *devstr)
3671 {
3672 	int	i;
3673 	char	buf[DEV_BSIZE];
3674 
3675 	/*
3676 	 * Read and verify the superblock
3677 	 */
3678 	rdfs((diskaddr_t)(SBOFF / sectorsize), (int)sbsize, (char *)&sblock);
3679 	checksblock();
3680 	if (sblock.fs_postblformat != FS_DYNAMICPOSTBLFMT) {
3681 		(void) fprintf(stderr,
3682 			gettext("old file system format; can't growfs\n"));
3683 		lockexit(32);
3684 	}
3685 
3686 	/*
3687 	 * can't shrink a file system
3688 	 */
3689 	grow_fssize = fsbtodb(&sblock, (uint64_t)sblock.fs_size);
3690 	if (fssize_db < grow_fssize) {
3691 		(void) fprintf(stderr,
3692 		    gettext("%lld sectors < current size of %lld sectors\n"),
3693 		    fssize_db, grow_fssize);
3694 		lockexit(32);
3695 	}
3696 
3697 	/*
3698 	 * can't grow a system to over a terabyte unless it was set up
3699 	 * as an MTB UFS file system.
3700 	 */
3701 	if (mtb == 'y' && sblock.fs_magic != MTB_UFS_MAGIC) {
3702 		if (fssize_db >= SECTORS_PER_TERABYTE) {
3703 			(void) fprintf(stderr, gettext(
3704 "File system was not set up with the multi-terabyte format.\n"));
3705 			(void) fprintf(stderr, gettext(
3706 "Its size cannot be increased to a terabyte or more.\n"));
3707 		} else {
3708 			(void) fprintf(stderr, gettext(
3709 "Cannot convert file system to multi-terabyte format.\n"));
3710 		}
3711 		lockexit(32);
3712 	}
3713 
3714 	logsetup(devstr);
3715 
3716 	/*
3717 	 * can't growfs when logging device has errors
3718 	 */
3719 	if ((islog && !islogok) ||
3720 	    ((FSOKAY == (sblock.fs_state + sblock.fs_time)) &&
3721 	    (sblock.fs_clean == FSLOG && !islog))) {
3722 		(void) fprintf(stderr,
3723 			gettext("logging device has errors; can't growfs\n"));
3724 		lockexit(32);
3725 	}
3726 
3727 	/*
3728 	 * disable ufs logging for growing
3729 	 */
3730 	if (isufslog) {
3731 		if (rl_log_control(devstr, _FIOLOGDISABLE) != RL_SUCCESS) {
3732 			(void) fprintf(stderr, gettext(
3733 				"failed to disable logging\n"));
3734 			lockexit(32);
3735 		}
3736 		islog = 0;
3737 		waslog = 1;
3738 	}
3739 
3740 	/*
3741 	 * if mounted write lock the file system to be grown
3742 	 */
3743 	if (ismounted)
3744 		wlockfs();
3745 
3746 	/*
3747 	 * refresh dynamic superblock state - disabling logging will have
3748 	 * changed the amount of free space available in the file system
3749 	 */
3750 	rdfs((diskaddr_t)(SBOFF / sectorsize), sbsize, (char *)&sblock);
3751 
3752 	/*
3753 	 * make sure device is big enough
3754 	 */
3755 	rdfs((diskaddr_t)fssize_db - 1, DEV_BSIZE, buf);
3756 	wtfs((diskaddr_t)fssize_db - 1, DEV_BSIZE, buf);
3757 
3758 	/*
3759 	 * read current summary information
3760 	 */
3761 	grow_fscs = read_summaryinfo(&sblock);
3762 
3763 	/*
3764 	 * save some current size related fields from the superblock
3765 	 * These are used in extendsummaryinfo()
3766 	 */
3767 	grow_fs_size	= sblock.fs_size;
3768 	grow_fs_ncg	= sblock.fs_ncg;
3769 	grow_fs_csaddr	= (diskaddr_t)sblock.fs_csaddr;
3770 	grow_fs_cssize	= sblock.fs_cssize;
3771 
3772 	/*
3773 	 * save and reset the clean flag
3774 	 */
3775 	if (FSOKAY == (sblock.fs_state + sblock.fs_time))
3776 		grow_fs_clean = sblock.fs_clean;
3777 	else
3778 		grow_fs_clean = FSBAD;
3779 	sblock.fs_clean = FSBAD;
3780 	sblock.fs_state = FSOKAY - sblock.fs_time;
3781 	isbad = 1;
3782 	wtfs((diskaddr_t)(SBOFF / sectorsize), sbsize, (char *)&sblock);
3783 }
3784 
3785 void
3786 checkdev(char *rdev, char *bdev)
3787 {
3788 	struct stat64	statarea;
3789 
3790 	if (stat64(bdev, &statarea) < 0) {
3791 		(void) fprintf(stderr, gettext("can't check mount point; "));
3792 		(void) fprintf(stderr, gettext("can't stat %s\n"), bdev);
3793 		lockexit(32);
3794 	}
3795 	if ((statarea.st_mode & S_IFMT) != S_IFBLK) {
3796 		(void) fprintf(stderr, gettext(
3797 		    "can't check mount point; %s is not a block device\n"),
3798 		    bdev);
3799 		lockexit(32);
3800 	}
3801 	if (stat64(rdev, &statarea) < 0) {
3802 		(void) fprintf(stderr, gettext("can't stat %s\n"), rdev);
3803 		lockexit(32);
3804 	}
3805 	if ((statarea.st_mode & S_IFMT) != S_IFCHR) {
3806 		(void) fprintf(stderr,
3807 			gettext("%s is not a character device\n"), rdev);
3808 		lockexit(32);
3809 	}
3810 }
3811 
3812 void
3813 checkmount(struct mnttab *mntp, char *bdevname)
3814 {
3815 	struct stat64	statdir;
3816 	struct stat64	statdev;
3817 
3818 	if (strcmp(bdevname, mntp->mnt_special) == 0) {
3819 		if (stat64(mntp->mnt_mountp, &statdir) == -1) {
3820 			(void) fprintf(stderr, gettext("can't stat %s\n"),
3821 				mntp->mnt_mountp);
3822 			lockexit(32);
3823 		}
3824 		if (stat64(mntp->mnt_special, &statdev) == -1) {
3825 			(void) fprintf(stderr, gettext("can't stat %s\n"),
3826 				mntp->mnt_special);
3827 			lockexit(32);
3828 		}
3829 		if (statdir.st_dev != statdev.st_rdev) {
3830 			(void) fprintf(stderr, gettext(
3831 				"%s is not mounted on %s; mnttab(4) wrong\n"),
3832 				mntp->mnt_special, mntp->mnt_mountp);
3833 			lockexit(32);
3834 		}
3835 		ismounted = 1;
3836 		if (directory) {
3837 			if (strcmp(mntp->mnt_mountp, directory) != 0) {
3838 				(void) fprintf(stderr,
3839 				gettext("%s is mounted on %s, not %s\n"),
3840 					bdevname, mntp->mnt_mountp, directory);
3841 				lockexit(32);
3842 			}
3843 		} else {
3844 			if (grow)
3845 				(void) fprintf(stderr, gettext(
3846 					"%s is mounted on %s; can't growfs\n"),
3847 					bdevname, mntp->mnt_mountp);
3848 			else
3849 				(void) fprintf(stderr,
3850 					gettext("%s is mounted, can't mkfs\n"),
3851 					bdevname);
3852 			lockexit(32);
3853 		}
3854 	}
3855 }
3856 
3857 struct dinode	*dibuf	= 0;
3858 diskaddr_t	difrag	= 0;
3859 
3860 struct dinode *
3861 gdinode(ino_t ino)
3862 {
3863 	/*
3864 	 * read the block of inodes containing inode number ino
3865 	 */
3866 	if (dibuf == 0)
3867 		dibuf = (struct dinode *)malloc((unsigned)sblock.fs_bsize);
3868 	if (itod(&sblock, ino) != difrag) {
3869 		difrag = itod(&sblock, ino);
3870 		rdfs(fsbtodb(&sblock, (uint64_t)difrag), (int)sblock.fs_bsize,
3871 			(char *)dibuf);
3872 	}
3873 	return (dibuf + (ino % INOPB(&sblock)));
3874 }
3875 
3876 /*
3877  * structure that manages the frags we need for extended summary info
3878  *	These frags can be:
3879  *		free
3880  *		data  block
3881  *		alloc block
3882  */
3883 struct csfrag {
3884 	struct csfrag	*next;		/* next entry */
3885 	daddr32_t	 ofrag;		/* old frag */
3886 	daddr32_t	 nfrag;		/* new frag */
3887 	long		 cylno;		/* cylno of nfrag */
3888 	long		 frags;		/* number of frags */
3889 	long		 size;		/* size in bytes */
3890 	ino_t		 ino;		/* inode number */
3891 	long		 fixed;		/* Boolean - Already fixed? */
3892 };
3893 struct csfrag	*csfrag;		/* state unknown */
3894 struct csfrag	*csfragino;		/* frags belonging to an inode */
3895 struct csfrag	*csfragfree;		/* frags that are free */
3896 
3897 daddr32_t maxcsfrag	= 0;		/* maximum in range */
3898 daddr32_t mincsfrag	= 0x7fffffff;	/* minimum in range */
3899 
3900 int
3901 csfraginrange(daddr32_t frag)
3902 {
3903 	return ((frag >= mincsfrag) && (frag <= maxcsfrag));
3904 }
3905 
3906 struct csfrag *
3907 findcsfrag(daddr32_t frag, struct csfrag **cfap)
3908 {
3909 	struct csfrag	*cfp;
3910 
3911 	if (!csfraginrange(frag))
3912 		return (NULL);
3913 
3914 	for (cfp = *cfap; cfp; cfp = cfp->next)
3915 		if (cfp->ofrag == frag)
3916 			return (cfp);
3917 	return (NULL);
3918 }
3919 
3920 void
3921 checkindirect(ino_t ino, daddr32_t *fragsp, daddr32_t frag, int level)
3922 {
3923 	int			i;
3924 	int			ne	= sblock.fs_bsize / sizeof (daddr32_t);
3925 	daddr32_t			fsb[MAXBSIZE / sizeof (daddr32_t)];
3926 
3927 	if (frag == 0)
3928 		return;
3929 
3930 	rdfs(fsbtodb(&sblock, frag), (int)sblock.fs_bsize,
3931 	    (char *)fsb);
3932 
3933 	checkdirect(ino, fragsp, fsb, sblock.fs_bsize / sizeof (daddr32_t));
3934 
3935 	if (level)
3936 		for (i = 0; i < ne && *fragsp; ++i)
3937 			checkindirect(ino, fragsp, fsb[i], level-1);
3938 }
3939 
3940 void
3941 addcsfrag(ino_t ino, daddr32_t frag, struct csfrag **cfap)
3942 {
3943 	struct csfrag	*cfp, *curr, *prev;
3944 
3945 	/*
3946 	 * establish a range for faster checking in csfraginrange()
3947 	 */
3948 	if (frag > maxcsfrag)
3949 		maxcsfrag = frag;
3950 	if (frag < mincsfrag)
3951 		mincsfrag = frag;
3952 
3953 	/*
3954 	 * if this frag belongs to an inode and is not the start of a block
3955 	 *	then see if it is part of a frag range for this inode
3956 	 */
3957 	if (ino && (frag % sblock.fs_frag))
3958 		for (cfp = *cfap; cfp; cfp = cfp->next) {
3959 			if (ino != cfp->ino)
3960 				continue;
3961 			if (frag != cfp->ofrag + cfp->frags)
3962 				continue;
3963 			cfp->frags++;
3964 			cfp->size += sblock.fs_fsize;
3965 			return;
3966 		}
3967 	/*
3968 	 * allocate a csfrag entry and insert it in an increasing order into the
3969 	 * specified list
3970 	 */
3971 	cfp = (struct csfrag *)calloc(1, sizeof (struct csfrag));
3972 	cfp->ino	= ino;
3973 	cfp->ofrag	= frag;
3974 	cfp->frags	= 1;
3975 	cfp->size	= sblock.fs_fsize;
3976 	for (prev = NULL, curr = *cfap; curr != NULL;
3977 		prev = curr, curr = curr->next) {
3978 		if (frag < curr->ofrag) {
3979 			cfp->next = curr;
3980 			if (prev)
3981 				prev->next = cfp;	/* middle element */
3982 			else
3983 				*cfap = cfp;		/* first element */
3984 			break;
3985 		}
3986 		if (curr->next == NULL) {
3987 			curr->next = cfp;		/* last element	*/
3988 			break;
3989 		}
3990 	}
3991 	if (*cfap == NULL)	/* will happen only once */
3992 		*cfap = cfp;
3993 }
3994 
3995 void
3996 delcsfrag(daddr32_t frag, struct csfrag **cfap)
3997 {
3998 	struct csfrag	*cfp;
3999 	struct csfrag	**cfpp;
4000 
4001 	/*
4002 	 * free up entry whose beginning frag matches
4003 	 */
4004 	for (cfpp = cfap; *cfpp; cfpp = &(*cfpp)->next) {
4005 		if (frag == (*cfpp)->ofrag) {
4006 			cfp = *cfpp;
4007 			*cfpp = (*cfpp)->next;
4008 			free((char *)cfp);
4009 			return;
4010 		}
4011 	}
4012 }
4013 
4014 /*
4015  * See whether any of the direct blocks in the array pointed by "db" and of
4016  * length "ne" are within the range of frags needed to extend the cylinder
4017  * summary. If so, remove those frags from the "as-yet-unclassified" list
4018  * (csfrag) and add them to the "owned-by-inode" list (csfragino).
4019  * For each such frag found, decrement the frag count pointed to by fragsp.
4020  * "ino" is the inode that contains (either directly or indirectly) the frags
4021  * being checked.
4022  */
4023 void
4024 checkdirect(ino_t ino, daddr32_t *fragsp, daddr32_t *db, int ne)
4025 {
4026 	int	 i;
4027 	int	 j;
4028 	int	 found;
4029 	diskaddr_t	 frag;
4030 
4031 	/*
4032 	 * scan for allocation within the new summary info range
4033 	 */
4034 	for (i = 0; i < ne && *fragsp; ++i) {
4035 		if ((frag = *db++) != 0) {
4036 			found = 0;
4037 			for (j = 0; j < sblock.fs_frag && *fragsp; ++j) {
4038 				if (found || (found = csfraginrange(frag))) {
4039 					addcsfrag(ino, frag, &csfragino);
4040 					delcsfrag(frag, &csfrag);
4041 				}
4042 				++frag;
4043 				--(*fragsp);
4044 			}
4045 		}
4046 	}
4047 }
4048 
4049 void
4050 findcsfragino()
4051 {
4052 	int		 i;
4053 	int		 j;
4054 	daddr32_t		 frags;
4055 	struct dinode	*dp;
4056 
4057 	/*
4058 	 * scan all old inodes looking for allocations in the new
4059 	 * summary info range.  Move the affected frag from the
4060 	 * generic csfrag list onto the `owned-by-inode' list csfragino.
4061 	 */
4062 	for (i = UFSROOTINO; i < grow_fs_ncg*sblock.fs_ipg && csfrag; ++i) {
4063 		dp = gdinode((ino_t)i);
4064 		switch (dp->di_mode & IFMT) {
4065 			case IFSHAD	:
4066 			case IFLNK 	:
4067 			case IFDIR 	:
4068 			case IFREG 	: break;
4069 			default		: continue;
4070 		}
4071 
4072 		frags   = dbtofsb(&sblock, dp->di_blocks);
4073 
4074 		checkdirect((ino_t)i, &frags, &dp->di_db[0], NDADDR+NIADDR);
4075 		for (j = 0; j < NIADDR && frags; ++j)
4076 			checkindirect((ino_t)i, &frags, dp->di_ib[j], j);
4077 	}
4078 }
4079 
4080 void
4081 fixindirect(daddr32_t frag, int level)
4082 {
4083 	int			 i;
4084 	int			 ne	= sblock.fs_bsize / sizeof (daddr32_t);
4085 	daddr32_t			fsb[MAXBSIZE / sizeof (daddr32_t)];
4086 
4087 	if (frag == 0)
4088 		return;
4089 
4090 	rdfs(fsbtodb(&sblock, (uint64_t)frag), (int)sblock.fs_bsize,
4091 	    (char *)fsb);
4092 
4093 	fixdirect((caddr_t)fsb, frag, fsb, ne);
4094 
4095 	if (level)
4096 		for (i = 0; i < ne; ++i)
4097 			fixindirect(fsb[i], level-1);
4098 }
4099 
4100 void
4101 fixdirect(caddr_t bp, daddr32_t frag, daddr32_t *db, int ne)
4102 {
4103 	int	 i;
4104 	struct csfrag	*cfp;
4105 
4106 	for (i = 0; i < ne; ++i, ++db) {
4107 		if (*db == 0)
4108 			continue;
4109 		if ((cfp = findcsfrag(*db, &csfragino)) == NULL)
4110 			continue;
4111 		*db = cfp->nfrag;
4112 		cfp->fixed = 1;
4113 		wtfs(fsbtodb(&sblock, (uint64_t)frag), (int)sblock.fs_bsize,
4114 		    bp);
4115 	}
4116 }
4117 
4118 void
4119 fixcsfragino()
4120 {
4121 	int		 i;
4122 	struct dinode	*dp;
4123 	struct csfrag	*cfp;
4124 
4125 	for (cfp = csfragino; cfp; cfp = cfp->next) {
4126 		if (cfp->fixed)
4127 			continue;
4128 		dp = gdinode((ino_t)cfp->ino);
4129 		fixdirect((caddr_t)dibuf, difrag, dp->di_db, NDADDR+NIADDR);
4130 		for (i = 0; i < NIADDR; ++i)
4131 			fixindirect(dp->di_ib[i], i);
4132 	}
4133 }
4134 
4135 /*
4136  * Read the cylinders summary information specified by settings in the
4137  * passed 'fs' structure into a new allocated array of csum structures.
4138  * The caller is responsible for freeing the returned array.
4139  * Return a pointer to an array of csum structures.
4140  */
4141 static struct csum *
4142 read_summaryinfo(struct	fs *fsp)
4143 {
4144 	struct csum 	*csp;
4145 	int		i;
4146 
4147 	if ((csp = malloc((size_t)fsp->fs_cssize)) == NULL) {
4148 		(void) fprintf(stderr, gettext("cannot create csum list,"
4149 			" not enough memory\n"));
4150 		exit(32);
4151 	}
4152 
4153 	for (i = 0; i < fsp->fs_cssize; i += fsp->fs_bsize) {
4154 		rdfs(fsbtodb(fsp,
4155 			(uint64_t)(fsp->fs_csaddr + numfrags(fsp, i))),
4156 			(int)(fsp->fs_cssize - i < fsp->fs_bsize ?
4157 			fsp->fs_cssize - i : fsp->fs_bsize),
4158 			((caddr_t)csp) + i);
4159 	}
4160 
4161 	return (csp);
4162 }
4163 
4164 /*
4165  * Check the allocation of fragments that are to be made part of a csum block.
4166  * A fragment is allocated if it is either in the csfragfree list or, it is
4167  * in the csfragino list and has new frags associated with it.
4168  * Return the number of allocated fragments.
4169  */
4170 int64_t
4171 checkfragallocated(daddr32_t frag)
4172 {
4173 	struct 	csfrag	*cfp;
4174 	/*
4175 	 * Since the lists are sorted we can break the search if the asked
4176 	 * frag is smaller then the one in the list.
4177 	 */
4178 	for (cfp = csfragfree; cfp != NULL && frag >= cfp->ofrag;
4179 		cfp = cfp->next) {
4180 		if (frag == cfp->ofrag)
4181 			return (1);
4182 	}
4183 	for (cfp = csfragino; cfp != NULL && frag >= cfp->ofrag;
4184 		cfp = cfp->next) {
4185 		if (frag == cfp->ofrag && cfp->nfrag != 0)
4186 			return (cfp->frags);
4187 	}
4188 
4189 	return (0);
4190 }
4191 
4192 /*
4193  * Figure out how much the filesystem can be grown. The limiting factor is
4194  * the available free space needed to extend the cg summary info block.
4195  * The free space is determined in three steps:
4196  * - Try to extend the cg summary block to the required size.
4197  * - Find free blocks in last cg.
4198  * - Find free space in the last already allocated fragment of the summary info
4199  *   block, and use it for additional csum structures.
4200  * Return the maximum size of the new filesystem or 0 if it can't be grown.
4201  * Please note that this function leaves the global list pointers csfrag,
4202  * csfragfree, and csfragino initialized, and the caller is responsible for
4203  * freeing the lists.
4204  */
4205 diskaddr_t
4206 probe_summaryinfo()
4207 {
4208 	/* fragments by which the csum block can be extended. */
4209 	int64_t 	growth_csum_frags = 0;
4210 	/* fragments by which the filesystem can be extended. */
4211 	int64_t		growth_fs_frags = 0;
4212 	int64_t		new_fs_cssize;	/* size of csum blk in the new FS */
4213 	int64_t		new_fs_ncg;	/* number of cg in the new FS */
4214 	int64_t 	spare_csum;
4215 	daddr32_t	oldfrag_daddr;
4216 	daddr32_t	newfrag_daddr;
4217 	daddr32_t	daddr;
4218 	int		i;
4219 
4220 	/*
4221 	 * read and verify the superblock
4222 	 */
4223 	rdfs((diskaddr_t)(SBOFF / sectorsize), (int)sbsize, (char *)&sblock);
4224 	checksblock();
4225 
4226 	/*
4227 	 * check how much we can extend the cg summary info block
4228 	 */
4229 
4230 	/*
4231 	 * read current summary information
4232 	 */
4233 	fscs = read_summaryinfo(&sblock);
4234 
4235 	/*
4236 	 * build list of frags needed for cg summary info block extension
4237 	 */
4238 	oldfrag_daddr = howmany(sblock.fs_cssize, sblock.fs_fsize) +
4239 		sblock.fs_csaddr;
4240 	new_fs_ncg = howmany(dbtofsb(&sblock, fssize_db), sblock.fs_fpg);
4241 	new_fs_cssize = fragroundup(&sblock, new_fs_ncg * sizeof (struct csum));
4242 	newfrag_daddr = howmany(new_fs_cssize, sblock.fs_fsize) +
4243 		sblock.fs_csaddr;
4244 	/*
4245 	 * add all of the frags that are required to grow the cyl summary to the
4246 	 * csfrag list, which is the generic/unknown list, since at this point
4247 	 * we don't yet know the state of those frags.
4248 	 */
4249 	for (daddr = oldfrag_daddr; daddr < newfrag_daddr; daddr++)
4250 		addcsfrag((ino_t)0, daddr, &csfrag);
4251 
4252 	/*
4253 	 * filter free fragments and allocate them. Note that the free frags
4254 	 * must be allocated first otherwise they could be grabbed by
4255 	 * alloccsfragino() for data frags.
4256 	 */
4257 	findcsfragfree();
4258 	alloccsfragfree();
4259 
4260 	/*
4261 	 * filter fragments owned by inodes and allocate them
4262 	 */
4263 	grow_fs_ncg = sblock.fs_ncg; /* findcsfragino() needs this glob. var. */
4264 	findcsfragino();
4265 	alloccsfragino();
4266 
4267 	if (notenoughspace()) {
4268 		/*
4269 		 * check how many consecutive fragments could be allocated
4270 		 * in both lists.
4271 		 */
4272 		int64_t tmp_frags;
4273 		for (daddr = oldfrag_daddr; daddr < newfrag_daddr;
4274 			daddr += tmp_frags) {
4275 			if ((tmp_frags = checkfragallocated(daddr)) > 0)
4276 				growth_csum_frags += tmp_frags;
4277 			else
4278 				break;
4279 		}
4280 	} else {
4281 		/*
4282 		 * We have all we need for the new desired size,
4283 		 * so clean up and report back.
4284 		 */
4285 		return (fssize_db);
4286 	}
4287 
4288 	/*
4289 	 * given the number of fragments by which the csum block can be grown
4290 	 * compute by how many new fragments the FS can be increased.
4291 	 * It is the number of csum instances per fragment multiplied by
4292 	 * `growth_csum_frags' and the number of fragments per cylinder group.
4293 	 */
4294 	growth_fs_frags = howmany(sblock.fs_fsize, sizeof (struct csum)) *
4295 		growth_csum_frags * sblock.fs_fpg;
4296 
4297 	/*
4298 	 * compute free fragments in the last cylinder group
4299 	 */
4300 	rdcg(sblock.fs_ncg - 1);
4301 	growth_fs_frags += sblock.fs_fpg - acg.cg_ndblk;
4302 
4303 	/*
4304 	 * compute how many csum instances are unused in the old csum block.
4305 	 * For each unused csum instance the FS can be grown by one cylinder
4306 	 * group without extending the csum block.
4307 	 */
4308 	spare_csum = howmany(sblock.fs_cssize, sizeof (struct csum)) -
4309 		sblock.fs_ncg;
4310 	if (spare_csum > 0)
4311 		growth_fs_frags += spare_csum * sblock.fs_fpg;
4312 
4313 	/*
4314 	 * recalculate the new filesystem size in sectors, shorten it by
4315 	 * the requested size `fssize_db' if necessary.
4316 	 */
4317 	if (growth_fs_frags > 0) {
4318 		diskaddr_t sect;
4319 		sect = (sblock.fs_size + growth_fs_frags) * sblock.fs_nspf;
4320 		return ((sect > fssize_db) ? fssize_db : sect);
4321 	}
4322 
4323 	return (0);
4324 }
4325 
4326 void
4327 extendsummaryinfo()
4328 {
4329 	int64_t		i;
4330 	int		localtest	= test;
4331 	int64_t		frags;
4332 	daddr32_t		oldfrag;
4333 	daddr32_t		newfrag;
4334 
4335 	/*
4336 	 * if no-write (-N), don't bother
4337 	 */
4338 	if (Nflag)
4339 		return;
4340 
4341 again:
4342 	flcg();
4343 	/*
4344 	 * summary info did not change size -- do nothing unless in test mode
4345 	 */
4346 	if (grow_fs_cssize == sblock.fs_cssize)
4347 		if (!localtest)
4348 			return;
4349 
4350 	/*
4351 	 * build list of frags needed for additional summary information
4352 	 */
4353 	oldfrag = howmany(grow_fs_cssize, sblock.fs_fsize) + grow_fs_csaddr;
4354 	newfrag = howmany(sblock.fs_cssize, sblock.fs_fsize) + grow_fs_csaddr;
4355 	/*
4356 	 * add all of the frags that are required to grow the cyl summary to the
4357 	 * csfrag list, which is the generic/unknown list, since at this point
4358 	 * we don't yet know the state of those frags.
4359 	 */
4360 	for (i = oldfrag, frags = 0; i < newfrag; ++i, ++frags)
4361 		addcsfrag((ino_t)0, (diskaddr_t)i, &csfrag);
4362 	/*
4363 	 * reduce the number of data blocks in the file system (fs_dsize) by
4364 	 * the number of frags that need to be added to the cyl summary
4365 	 */
4366 	sblock.fs_dsize -= (newfrag - oldfrag);
4367 
4368 	/*
4369 	 * In test mode, we move more data than necessary from
4370 	 * cylinder group 0.  The lookup/allocate/move code can be
4371 	 * better stressed without having to create HUGE file systems.
4372 	 */
4373 	if (localtest)
4374 		for (i = newfrag; i < grow_sifrag; ++i) {
4375 			if (frags >= testfrags)
4376 				break;
4377 			frags++;
4378 			addcsfrag((ino_t)0, (diskaddr_t)i, &csfrag);
4379 		}
4380 
4381 	/*
4382 	 * move frags to free or inode lists, depending on owner
4383 	 */
4384 	findcsfragfree();
4385 	findcsfragino();
4386 
4387 	/*
4388 	 * if not all frags can be located, file system must be inconsistent
4389 	 */
4390 	if (csfrag) {
4391 		isbad = 1;	/* should already be set, but make sure */
4392 		lockexit(32);
4393 	}
4394 
4395 	/*
4396 	 * allocate the free frags. Note that the free frags must be allocated
4397 	 * first otherwise they could be grabbed by alloccsfragino() for data
4398 	 * frags.
4399 	 */
4400 	alloccsfragfree();
4401 	/*
4402 	 * allocate extra space for inode frags
4403 	 */
4404 	alloccsfragino();
4405 
4406 	/*
4407 	 * not enough space
4408 	 */
4409 	if (notenoughspace()) {
4410 		unalloccsfragfree();
4411 		unalloccsfragino();
4412 		if (localtest && !testforce) {
4413 			localtest = 0;
4414 			goto again;
4415 		}
4416 		(void) fprintf(stderr, gettext("Not enough free space\n"));
4417 		lockexit(NOTENOUGHSPACE);
4418 	}
4419 
4420 	/*
4421 	 * copy the data from old frags to new frags
4422 	 */
4423 	copycsfragino();
4424 
4425 	/*
4426 	 * fix the inodes to point to the new frags
4427 	 */
4428 	fixcsfragino();
4429 
4430 	/*
4431 	 * We may have moved more frags than we needed.  Free them.
4432 	 */
4433 	rdcg((long)0);
4434 	for (i = newfrag; i <= maxcsfrag; ++i)
4435 		setbit(cg_blksfree(&acg), i-cgbase(&sblock, 0));
4436 	wtcg();
4437 
4438 	flcg();
4439 }
4440 
4441 /*
4442  * Check if all fragments in the `csfragino' list were reallocated.
4443  */
4444 int
4445 notenoughspace()
4446 {
4447 	struct csfrag	*cfp;
4448 
4449 	/*
4450 	 * If any element in the csfragino array has a "new frag location"
4451 	 * of 0, the allocfrags() function was unsuccessful in allocating
4452 	 * space for moving the frag represented by this array element.
4453 	 */
4454 	for (cfp = csfragino; cfp; cfp = cfp->next)
4455 		if (cfp->nfrag == 0)
4456 			return (1);
4457 	return (0);
4458 }
4459 
4460 void
4461 unalloccsfragino()
4462 {
4463 	struct csfrag	*cfp;
4464 
4465 	while ((cfp = csfragino) != NULL) {
4466 		if (cfp->nfrag)
4467 			freefrags(cfp->nfrag, cfp->frags, cfp->cylno);
4468 		delcsfrag(cfp->ofrag, &csfragino);
4469 	}
4470 }
4471 
4472 void
4473 unalloccsfragfree()
4474 {
4475 	struct csfrag	*cfp;
4476 
4477 	while ((cfp = csfragfree) != NULL) {
4478 		freefrags(cfp->ofrag, cfp->frags, cfp->cylno);
4479 		delcsfrag(cfp->ofrag, &csfragfree);
4480 	}
4481 }
4482 
4483 /*
4484  * For each frag in the "as-yet-unclassified" list (csfrag), see if
4485  * it's free (i.e., its bit is set in the free frag bit map).  If so,
4486  * move it from the "as-yet-unclassified" list to the csfragfree list.
4487  */
4488 void
4489 findcsfragfree()
4490 {
4491 	struct csfrag	*cfp;
4492 	struct csfrag	*cfpnext;
4493 
4494 	/*
4495 	 * move free frags onto the free-frag list
4496 	 */
4497 	rdcg((long)0);
4498 	for (cfp = csfrag; cfp; cfp = cfpnext) {
4499 		cfpnext = cfp->next;
4500 		if (isset(cg_blksfree(&acg), cfp->ofrag - cgbase(&sblock, 0))) {
4501 			addcsfrag(cfp->ino, cfp->ofrag, &csfragfree);
4502 			delcsfrag(cfp->ofrag, &csfrag);
4503 		}
4504 	}
4505 }
4506 
4507 void
4508 copycsfragino()
4509 {
4510 	struct csfrag	*cfp;
4511 	char		buf[MAXBSIZE];
4512 
4513 	/*
4514 	 * copy data from old frags to newly allocated frags
4515 	 */
4516 	for (cfp = csfragino; cfp; cfp = cfp->next) {
4517 		rdfs(fsbtodb(&sblock, (uint64_t)cfp->ofrag), (int)cfp->size,
4518 		    buf);
4519 		wtfs(fsbtodb(&sblock, (uint64_t)cfp->nfrag), (int)cfp->size,
4520 		    buf);
4521 	}
4522 }
4523 
4524 long	curcylno	= -1;
4525 int	cylnodirty	= 0;
4526 
4527 void
4528 rdcg(long cylno)
4529 {
4530 	if (cylno != curcylno) {
4531 		flcg();
4532 		curcylno = cylno;
4533 		rdfs(fsbtodb(&sblock, (uint64_t)cgtod(&sblock, curcylno)),
4534 			(int)sblock.fs_cgsize, (char *)&acg);
4535 	}
4536 }
4537 
4538 void
4539 flcg()
4540 {
4541 	if (cylnodirty) {
4542 		if (debug && Pflag) {
4543 			(void) fprintf(stderr,
4544 				"Assert: cylnodirty set in probe mode\n");
4545 			return;
4546 		}
4547 		resetallocinfo();
4548 		wtfs(fsbtodb(&sblock, (uint64_t)cgtod(&sblock, curcylno)),
4549 			(int)sblock.fs_cgsize, (char *)&acg);
4550 		cylnodirty = 0;
4551 	}
4552 	curcylno = -1;
4553 }
4554 
4555 void
4556 wtcg()
4557 {
4558 	if (!Pflag) {
4559 		/* probe mode should never write to disk */
4560 		cylnodirty = 1;
4561 	}
4562 }
4563 
4564 void
4565 allocfrags(long frags, daddr32_t *fragp, long *cylnop)
4566 {
4567 	int	 i;
4568 	int	 j;
4569 	long	 bits;
4570 	long	 bit;
4571 
4572 	/*
4573 	 * Allocate a free-frag range in an old cylinder group
4574 	 */
4575 	for (i = 0, *fragp = 0; i < grow_fs_ncg; ++i) {
4576 		if (((fscs+i)->cs_nffree < frags) && ((fscs+i)->cs_nbfree == 0))
4577 			continue;
4578 		rdcg((long)i);
4579 		bit = bits = 0;
4580 		while (findfreerange(&bit, &bits)) {
4581 			if (frags <= bits)  {
4582 				for (j = 0; j < frags; ++j)
4583 					clrbit(cg_blksfree(&acg), bit+j);
4584 				wtcg();
4585 				*cylnop = i;
4586 				*fragp  = bit + cgbase(&sblock, i);
4587 				return;
4588 			}
4589 			bit += bits;
4590 		}
4591 	}
4592 }
4593 
4594 /*
4595  * Allocate space for frags that need to be moved in order to free up space for
4596  * expanding the cylinder summary info.
4597  * For each frag that needs to be moved (each frag or range of frags in
4598  * the csfragino list), allocate a new location and store the frag number
4599  * of that new location in the nfrag field of the csfrag struct.
4600  * If a new frag can't be allocated for any element in the csfragino list,
4601  * set the new frag number for that element to 0 and return immediately.
4602  * The notenoughspace() function will detect this condition.
4603  */
4604 void
4605 alloccsfragino()
4606 {
4607 	struct csfrag	*cfp;
4608 
4609 	/*
4610 	 * allocate space for inode frag ranges
4611 	 */
4612 	for (cfp = csfragino; cfp; cfp = cfp->next) {
4613 		allocfrags(cfp->frags, &cfp->nfrag, &cfp->cylno);
4614 		if (cfp->nfrag == 0)
4615 			break;
4616 	}
4617 }
4618 
4619 void
4620 alloccsfragfree()
4621 {
4622 	struct csfrag	*cfp;
4623 
4624 	/*
4625 	 * allocate the free frags needed for extended summary info
4626 	 */
4627 	rdcg((long)0);
4628 
4629 	for (cfp = csfragfree; cfp; cfp = cfp->next)
4630 		clrbit(cg_blksfree(&acg), cfp->ofrag - cgbase(&sblock, 0));
4631 
4632 	wtcg();
4633 }
4634 
4635 void
4636 freefrags(daddr32_t frag, long frags, long cylno)
4637 {
4638 	int	i;
4639 
4640 	/*
4641 	 * free frags
4642 	 */
4643 	rdcg(cylno);
4644 	for (i = 0; i < frags; ++i) {
4645 		setbit(cg_blksfree(&acg), (frag+i) - cgbase(&sblock, cylno));
4646 	}
4647 	wtcg();
4648 }
4649 
4650 int
4651 findfreerange(long *bitp, long *bitsp)
4652 {
4653 	long	 bit;
4654 
4655 	/*
4656 	 * find a range of free bits in a cylinder group bit map
4657 	 */
4658 	for (bit = *bitp, *bitsp = 0; bit < acg.cg_ndblk; ++bit)
4659 		if (isset(cg_blksfree(&acg), bit))
4660 			break;
4661 
4662 	if (bit >= acg.cg_ndblk)
4663 		return (0);
4664 
4665 	*bitp  = bit;
4666 	*bitsp = 1;
4667 	for (++bit; bit < acg.cg_ndblk; ++bit, ++(*bitsp)) {
4668 		if ((bit % sblock.fs_frag) == 0)
4669 			break;
4670 		if (isclr(cg_blksfree(&acg), bit))
4671 			break;
4672 	}
4673 	return (1);
4674 }
4675 
4676 void
4677 resetallocinfo()
4678 {
4679 	long	cno;
4680 	long	bit;
4681 	long	bits;
4682 
4683 	/*
4684 	 * Compute the free blocks/frags info and update the appropriate
4685 	 * inmemory superblock, summary info, and cylinder group fields
4686 	 */
4687 	sblock.fs_cstotal.cs_nffree -= acg.cg_cs.cs_nffree;
4688 	sblock.fs_cstotal.cs_nbfree -= acg.cg_cs.cs_nbfree;
4689 
4690 	acg.cg_cs.cs_nffree = 0;
4691 	acg.cg_cs.cs_nbfree = 0;
4692 
4693 	bzero((caddr_t)acg.cg_frsum, sizeof (acg.cg_frsum));
4694 	bzero((caddr_t)cg_blktot(&acg), (int)(acg.cg_iusedoff-acg.cg_btotoff));
4695 
4696 	bit = bits = 0;
4697 	while (findfreerange(&bit, &bits)) {
4698 		if (bits == sblock.fs_frag) {
4699 			acg.cg_cs.cs_nbfree++;
4700 			cno = cbtocylno(&sblock, bit);
4701 			cg_blktot(&acg)[cno]++;
4702 			cg_blks(&sblock, &acg, cno)[cbtorpos(&sblock, bit)]++;
4703 		} else {
4704 			acg.cg_cs.cs_nffree += bits;
4705 			acg.cg_frsum[bits]++;
4706 		}
4707 		bit += bits;
4708 	}
4709 
4710 	*(fscs + acg.cg_cgx) = acg.cg_cs;
4711 
4712 	sblock.fs_cstotal.cs_nffree += acg.cg_cs.cs_nffree;
4713 	sblock.fs_cstotal.cs_nbfree += acg.cg_cs.cs_nbfree;
4714 }
4715 
4716 void
4717 extendcg(long cylno)
4718 {
4719 	int	i;
4720 	diskaddr_t	dupper;
4721 	diskaddr_t	cbase;
4722 	diskaddr_t	dmax;
4723 
4724 	/*
4725 	 * extend the cylinder group at the end of the old file system
4726 	 * if it was partially allocated becase of lack of space
4727 	 */
4728 	flcg();
4729 	rdcg(cylno);
4730 
4731 	dupper = acg.cg_ndblk;
4732 	if (cylno == sblock.fs_ncg - 1)
4733 		acg.cg_ncyl = sblock.fs_ncyl % sblock.fs_cpg;
4734 	else
4735 		acg.cg_ncyl = sblock.fs_cpg;
4736 	cbase = cgbase(&sblock, cylno);
4737 	dmax = cbase + sblock.fs_fpg;
4738 	if (dmax > sblock.fs_size)
4739 		dmax = sblock.fs_size;
4740 	acg.cg_ndblk = dmax - cbase;
4741 
4742 	for (i = dupper; i < acg.cg_ndblk; ++i)
4743 		setbit(cg_blksfree(&acg), i);
4744 
4745 	sblock.fs_dsize += (acg.cg_ndblk - dupper);
4746 
4747 	wtcg();
4748 	flcg();
4749 }
4750 
4751 struct lockfs	lockfs;
4752 int		lockfd;
4753 int		islocked;
4754 int		lockfskey;
4755 char		lockfscomment[128];
4756 
4757 void
4758 ulockfs()
4759 {
4760 	/*
4761 	 * if the file system was locked, unlock it before exiting
4762 	 */
4763 	if (islocked == 0)
4764 		return;
4765 
4766 	/*
4767 	 * first, check if the lock held
4768 	 */
4769 	lockfs.lf_flags = LOCKFS_MOD;
4770 	if (ioctl(lockfd, _FIOLFSS, &lockfs) == -1) {
4771 		perror(directory);
4772 		lockexit(32);
4773 	}
4774 
4775 	if (LOCKFS_IS_MOD(&lockfs)) {
4776 		(void) fprintf(stderr,
4777 			gettext("FILE SYSTEM CHANGED DURING GROWFS!\n"));
4778 		(void) fprintf(stderr,
4779 			gettext("   See lockfs(1), umount(1), and fsck(1)\n"));
4780 		lockexit(32);
4781 	}
4782 	/*
4783 	 * unlock the file system
4784 	 */
4785 	lockfs.lf_lock  = LOCKFS_ULOCK;
4786 	lockfs.lf_flags = 0;
4787 	lockfs.lf_key   = lockfskey;
4788 	clockfs();
4789 	if (ioctl(lockfd, _FIOLFS, &lockfs) == -1) {
4790 		perror(directory);
4791 		lockexit(32);
4792 	}
4793 }
4794 
4795 void
4796 wlockfs()
4797 {
4798 
4799 	/*
4800 	 * if no-write (-N), don't bother
4801 	 */
4802 	if (Nflag)
4803 		return;
4804 	/*
4805 	 * open the mountpoint, and write lock the file system
4806 	 */
4807 	if ((lockfd = open64(directory, O_RDONLY)) == -1) {
4808 		perror(directory);
4809 		lockexit(32);
4810 	}
4811 
4812 	/*
4813 	 * check if it is already locked
4814 	 */
4815 	if (ioctl(lockfd, _FIOLFSS, &lockfs) == -1) {
4816 		perror(directory);
4817 		lockexit(32);
4818 	}
4819 
4820 	if (lockfs.lf_lock != LOCKFS_WLOCK) {
4821 		lockfs.lf_lock  = LOCKFS_WLOCK;
4822 		lockfs.lf_flags = 0;
4823 		lockfs.lf_key   = 0;
4824 		clockfs();
4825 		if (ioctl(lockfd, _FIOLFS, &lockfs) == -1) {
4826 			perror(directory);
4827 			lockexit(32);
4828 		}
4829 	}
4830 	islocked = 1;
4831 	lockfskey = lockfs.lf_key;
4832 }
4833 
4834 void
4835 clockfs()
4836 {
4837 	time_t	t;
4838 	char	*ct;
4839 
4840 	(void) time(&t);
4841 	ct = ctime(&t);
4842 	ct[strlen(ct)-1] = '\0';
4843 
4844 	(void) sprintf(lockfscomment, "%s -- mkfs pid %d", ct, getpid());
4845 	lockfs.lf_comlen  = strlen(lockfscomment)+1;
4846 	lockfs.lf_comment = lockfscomment;
4847 }
4848 
4849 /*
4850  * Write the csum records and the superblock
4851  */
4852 void
4853 wtsb()
4854 {
4855 	long	i;
4856 
4857 	/*
4858 	 * write summary information
4859 	 */
4860 	for (i = 0; i < sblock.fs_cssize; i += sblock.fs_bsize)
4861 		wtfs(fsbtodb(&sblock, (uint64_t)(sblock.fs_csaddr +
4862 			numfrags(&sblock, i))),
4863 			(int)(sblock.fs_cssize - i < sblock.fs_bsize ?
4864 			sblock.fs_cssize - i : sblock.fs_bsize),
4865 			((char *)fscs) + i);
4866 
4867 	/*
4868 	 * write superblock
4869 	 */
4870 	sblock.fs_time = mkfstime;
4871 	wtfs((diskaddr_t)(SBOFF / sectorsize), sbsize, (char *)&sblock);
4872 }
4873 
4874 /*
4875  * Verify that the optimization selection is reasonable, and advance
4876  * the global "string" appropriately.
4877  */
4878 static char
4879 checkopt(char *optim)
4880 {
4881 	char	opt;
4882 	int	limit = strcspn(optim, ",");
4883 
4884 	switch (limit) {
4885 	case 0:	/* missing indicator (have comma or nul) */
4886 		(void) fprintf(stderr, gettext(
4887 		    "mkfs: missing optimization flag reset to `t' (time)\n"));
4888 		opt = 't';
4889 		break;
4890 
4891 	case 1: /* single-character indicator */
4892 		opt = *optim;
4893 		if ((opt != 's') && (opt != 't')) {
4894 			(void) fprintf(stderr, gettext(
4895 		    "mkfs: bad optimization value `%c' reset to `t' (time)\n"),
4896 			    opt);
4897 			opt = 't';
4898 		}
4899 		break;
4900 
4901 	default: /* multi-character indicator */
4902 		(void) fprintf(stderr, gettext(
4903 	    "mkfs: bad optimization value `%*.*s' reset to `t' (time)\n"),
4904 		    limit, limit, optim);
4905 		opt = 't';
4906 		break;
4907 	}
4908 
4909 	string += limit;
4910 
4911 	return (opt);
4912 }
4913 
4914 /*
4915  * Verify that the mtb selection is reasonable, and advance
4916  * the global "string" appropriately.
4917  */
4918 static char
4919 checkmtb(char *mtbarg)
4920 {
4921 	char	mtbc;
4922 	int	limit = strcspn(mtbarg, ",");
4923 
4924 	switch (limit) {
4925 	case 0:	/* missing indicator (have comma or nul) */
4926 		(void) fprintf(stderr, gettext(
4927 		    "mkfs: missing mtb flag reset to `n' (no mtb support)\n"));
4928 		mtbc = 'n';
4929 		break;
4930 
4931 	case 1: /* single-character indicator */
4932 		mtbc = tolower(*mtbarg);
4933 		if ((mtbc != 'y') && (mtbc != 'n')) {
4934 			(void) fprintf(stderr, gettext(
4935 		    "mkfs: bad mtb value `%c' reset to `n' (no mtb support)\n"),
4936 			    mtbc);
4937 			mtbc = 'n';
4938 		}
4939 		break;
4940 
4941 	default: /* multi-character indicator */
4942 		(void) fprintf(stderr, gettext(
4943 	    "mkfs: bad mtb value `%*.*s' reset to `n' (no mtb support)\n"),
4944 		    limit, limit, mtbarg);
4945 		opt = 'n';
4946 		break;
4947 	}
4948 
4949 	string += limit;
4950 
4951 	return (mtbc);
4952 }
4953 
4954 /*
4955  * Verify that a value is in a range.  If it is not, resets it to
4956  * its default value if one is supplied, exits otherwise.
4957  *
4958  * When testing, can compare user_supplied to RC_KEYWORD or RC_POSITIONAL.
4959  */
4960 static void
4961 range_check(long *varp, char *name, long minimum, long maximum,
4962     long def_val, int user_supplied)
4963 {
4964 	if ((*varp < minimum) || (*varp > maximum)) {
4965 		if (user_supplied != RC_DEFAULT) {
4966 			(void) fprintf(stderr, gettext(
4967 	    "mkfs: bad value for %s: %ld must be between %ld and %ld\n"),
4968 			    name, *varp, minimum, maximum);
4969 		}
4970 		if (def_val != NO_DEFAULT) {
4971 			if (user_supplied) {
4972 				(void) fprintf(stderr,
4973 				    gettext("mkfs: %s reset to default %ld\n"),
4974 				    name, def_val);
4975 			}
4976 			*varp = def_val;
4977 			return;
4978 		}
4979 		lockexit(2);
4980 		/*NOTREACHED*/
4981 	}
4982 }
4983 
4984 /*
4985  * Verify that a value is in a range.  If it is not, resets it to
4986  * its default value if one is supplied, exits otherwise.
4987  *
4988  * When testing, can compare user_supplied to RC_KEYWORD or RC_POSITIONAL.
4989  */
4990 static void
4991 range_check_64(uint64_t *varp, char *name, uint64_t minimum, uint64_t maximum,
4992     uint64_t def_val, int user_supplied)
4993 {
4994 	if ((*varp < minimum) || (*varp > maximum)) {
4995 		if (user_supplied != RC_DEFAULT) {
4996 			(void) fprintf(stderr, gettext(
4997 	    "mkfs: bad value for %s: %lld must be between %lld and %lld\n"),
4998 			    name, *varp, minimum, maximum);
4999 		}
5000 		if (def_val != NO_DEFAULT) {
5001 			if (user_supplied) {
5002 				(void) fprintf(stderr,
5003 				    gettext("mkfs: %s reset to default %lld\n"),
5004 				    name, def_val);
5005 			}
5006 			*varp = def_val;
5007 			return;
5008 		}
5009 		lockexit(2);
5010 		/*NOTREACHED*/
5011 	}
5012 }
5013 
5014 /*
5015  * Blocks SIGINT from delivery.  Returns the previous mask in the
5016  * buffer provided, so that mask may be later restored.
5017  */
5018 static void
5019 block_sigint(sigset_t *old_mask)
5020 {
5021 	sigset_t block_mask;
5022 
5023 	if (sigemptyset(&block_mask) < 0) {
5024 		fprintf(stderr, gettext("Could not clear signal mask\n"));
5025 		lockexit(3);
5026 	}
5027 	if (sigaddset(&block_mask, SIGINT) < 0) {
5028 		fprintf(stderr, gettext("Could not set signal mask\n"));
5029 		lockexit(3);
5030 	}
5031 	if (sigprocmask(SIG_BLOCK, &block_mask, old_mask) < 0) {
5032 		fprintf(stderr, gettext("Could not block SIGINT\n"));
5033 		lockexit(3);
5034 	}
5035 }
5036 
5037 /*
5038  * Restores the signal mask that was in force before a call
5039  * to block_sigint().  This may actually still have SIGINT blocked,
5040  * if we've been recursively invoked.
5041  */
5042 static void
5043 unblock_sigint(sigset_t *old_mask)
5044 {
5045 	if (sigprocmask(SIG_UNBLOCK, old_mask, (sigset_t *)NULL) < 0) {
5046 		fprintf(stderr, gettext("Could not restore signal mask\n"));
5047 		lockexit(3);
5048 	}
5049 }
5050 
5051 /*
5052  * Attempt to be somewhat graceful about being interrupted, rather than
5053  * just silently leaving the filesystem in an unusable state.
5054  *
5055  * The kernel has blocked SIGINT upon entry, so we don't have to worry
5056  * about recursion if the user starts pounding on the keyboard.
5057  */
5058 static void
5059 recover_from_sigint(int signum)
5060 {
5061 	if (fso > -1) {
5062 		if ((Nflag != 0) || confirm_abort()) {
5063 			lockexit(4);
5064 		}
5065 	}
5066 }
5067 
5068 static int
5069 confirm_abort(void)
5070 {
5071 	char line[80];
5072 
5073 	printf(gettext("\n\nAborting at this point will leave the filesystem "
5074 		"in an inconsistent\nstate.  If you do choose to stop, "
5075 		"you will be given instructions on how to\nrecover "
5076 		"the filesystem.  Do you wish to cancel the filesystem "
5077 		"grow\noperation (y/n)?"));
5078 	if (getline(stdin, line, sizeof (line)) == EOF)
5079 		line[0] = 'y';
5080 
5081 	printf("\n");
5082 	if (line[0] == 'y' || line[0] == 'Y')
5083 		return (1);
5084 	else {
5085 		return (0);
5086 	}
5087 }
5088 
5089 static int
5090 getline(FILE *fp, char *loc, int maxlen)
5091 {
5092 	int n;
5093 	char *p, *lastloc;
5094 
5095 	p = loc;
5096 	lastloc = &p[maxlen-1];
5097 	while ((n = getc(fp)) != '\n') {
5098 		if (n == EOF)
5099 			return (EOF);
5100 		if (!isspace(n) && p < lastloc)
5101 			*p++ = n;
5102 	}
5103 	*p = 0;
5104 	return (p - loc);
5105 }
5106 
5107 /*
5108  * Calculate the maximum value of cylinders-per-group for a file
5109  * system with the characteristics:
5110  *
5111  *	bsize - file system block size
5112  *	fragsize - frag size
5113  *	nbpi - number of bytes of disk space per inode
5114  *	nrpos - number of rotational positions
5115  *	spc - sectors per cylinder
5116  *
5117  * These five characteristic are not adjustable (by this function).
5118  * The only attribute of the file system which IS adjusted by this
5119  * function in order to maximize cylinders-per-group is the proportion
5120  * of the cylinder group overhead block used for the inode map.  The
5121  * inode map cannot occupy more than one-third of the cylinder group
5122  * overhead block, but it's OK for it to occupy less than one-third
5123  * of the overhead block.
5124  *
5125  * The setting of nbpi determines one possible value for the maximum
5126  * size of a cylinder group.  It does so because it determines the total
5127  * number of inodes in the file system (file system size is fixed, and
5128  * nbpi is fixed, so the total number of inodes is fixed too).  The
5129  * cylinder group has to be small enough so that the number of inodes
5130  * in the cylinder group is less than or equal to the number of bits
5131  * in one-third (or whatever proportion is assumed) of a file system
5132  * block.  The details of the calculation are:
5133  *
5134  *     The macro MAXIpG_B(bsize, inode_divisor) determines the maximum
5135  *     number of inodes that can be in a cylinder group, given the
5136  *     proportion of the cylinder group overhead block used for the
5137  *     inode bitmaps (an inode_divisor of 3 means that 1/3 of the
5138  *     block is used for inode bitmaps; an inode_divisor of 12 means
5139  *     that 1/12 of the block is used for inode bitmaps.)
5140  *
5141  *     Once the number of inodes per cylinder group is known, the
5142  *     maximum value of cylinders-per-group (determined by nbpi)
5143  *     is calculated by the formula
5144  *
5145  *     maxcpg_given_nbpi = (size of a cylinder group)/(size of a cylinder)
5146  *
5147  *			 = (inodes-per-cg * nbpi)/(spc * DEV_BSIZE)
5148  *
5149  *     (Interestingly, the size of the file system never enters
5150  *     into this calculation.)
5151  *
5152  * Another possible value for the maximum cylinder group size is determined
5153  * by frag_size and nrpos.  The frags in the cylinder group must be
5154  * representable in the frag bitmaps in the cylinder overhead block and the
5155  * rotational positions for each cylinder must be represented in the
5156  * rotational position tables.  The calculation of the maximum cpg
5157  * value, given the frag and nrpos vales, is:
5158  *
5159  *     maxcpg_given_fragsize =
5160  *	  (available space in the overhead block) / (size of per-cylinder data)
5161  *
5162  *     The available space in the overhead block =
5163  *	  bsize - sizeof (struct cg) - space_used_for_inode_bitmaps
5164  *
5165  *     The size of the per-cylinder data is:
5166  *	    sizeof(long)            # for the "blocks avail per cylinder" field
5167  *	    + nrpos * sizeof(short)   # for the rotational position table entry
5168  *	    + frags-per-cylinder/NBBY # number of bytes to represent this
5169  *				      # cylinder in the frag bitmap
5170  *
5171  * The two calculated maximum values of cylinder-per-group will typically
5172  * turn out to be different, since they are derived from two different
5173  * constraints.  Usually, maxcpg_given_nbpi is much bigger than
5174  * maxcpg_given_fragsize.  But they can be brought together by
5175  * adjusting the proportion of the overhead block dedicated to
5176  * the inode bitmaps.  Decreasing the proportion of the cylinder
5177  * group overhead block used for inode maps will decrease
5178  * maxcpg_given_nbpi and increase maxcpg_given_fragsize.
5179  *
5180  * This function calculates the initial values of maxcpg_given_nbpi
5181  * and maxcpg_given_fragsize assuming that 1/3 of the cg overhead
5182  * block is used for inode bitmaps.  Then it decreases the proportion
5183  * of the cg overhead block used for inode bitmaps (by increasing
5184  * the value of inode_divisor) until maxcpg_given_nbpi and
5185  * maxcpg_given_fragsize are the same, or stop changing, or
5186  * maxcpg_given_nbpi is less than maxcpg_given_fragsize.
5187  *
5188  * The loop terminates when any of the following occur:
5189  *	* maxcpg_given_fragsize is greater than or equal to
5190  *	  maxcpg_given_nbpi
5191  *	* neither maxcpg_given_fragsize nor maxcpg_given_nbpi
5192  *	  change in the expected direction
5193  *
5194  * The loop is guaranteed to terminate because it only continues
5195  * while maxcpg_given_fragsize and maxcpg_given_nbpi are approaching
5196  * each other.  As soon they cross each other, or neither one changes
5197  * in the direction of the other, or one of them moves in the wrong
5198  * direction, the loop completes.
5199  */
5200 
5201 static long
5202 compute_maxcpg(long bsize, long fragsize, long nbpi, long nrpos, long spc)
5203 {
5204 	int	maxcpg_given_nbpi;	/* in cylinders */
5205 	int	maxcpg_given_fragsize;	/* in cylinders */
5206 	int	spf;			/* sectors per frag */
5207 	int	inode_divisor;
5208 	int	old_max_given_frag = 0;
5209 	int	old_max_given_nbpi = INT_MAX;
5210 
5211 	spf = fragsize / DEV_BSIZE;
5212 	inode_divisor = 3;
5213 
5214 	while (1) {
5215 		maxcpg_given_nbpi =
5216 		    (((int64_t)(MAXIpG_B(bsize, inode_divisor))) * nbpi) /
5217 		    (DEV_BSIZE * ((int64_t)spc));
5218 		maxcpg_given_fragsize =
5219 		    (bsize - (sizeof (struct cg)) - (bsize / inode_divisor)) /
5220 		    (sizeof (long) + nrpos * sizeof (short) +
5221 						(spc / spf) / NBBY);
5222 
5223 		if (maxcpg_given_fragsize >= maxcpg_given_nbpi)
5224 			return (maxcpg_given_nbpi);
5225 
5226 		/*
5227 		 * If neither value moves toward the other, return the
5228 		 * least of the old values (we use the old instead of the
5229 		 * new because: if the old is the same as the new, it
5230 		 * doesn't matter which ones we use.  If one of the
5231 		 * values changed, but in the wrong direction, the
5232 		 * new values are suspect.  Better use the old.  This
5233 		 * shouldn't happen, but it's best to check.
5234 		 */
5235 
5236 		if (!(maxcpg_given_nbpi < old_max_given_nbpi) &&
5237 		    !(maxcpg_given_fragsize > old_max_given_frag))
5238 			return (MIN(old_max_given_nbpi, old_max_given_frag));
5239 
5240 		/*
5241 		 * This is probably impossible, but if one of the maxcpg
5242 		 * values moved in the "right" direction and one moved
5243 		 * in the "wrong" direction (that is, the two values moved
5244 		 * in the same direction), the previous conditional won't
5245 		 * recognize that the values aren't converging (since at
5246 		 * least one value moved in the "right" direction, the
5247 		 * last conditional says "keep going").
5248 		 *
5249 		 * Just to make absolutely certain that the loop terminates,
5250 		 * check for one of the values moving in the "wrong" direction
5251 		 * and terminate the loop if it happens.
5252 		 */
5253 
5254 		if (maxcpg_given_nbpi > old_max_given_nbpi ||
5255 		    maxcpg_given_fragsize < old_max_given_frag)
5256 			return (MIN(old_max_given_nbpi, old_max_given_frag));
5257 
5258 		old_max_given_nbpi = maxcpg_given_nbpi;
5259 		old_max_given_frag = maxcpg_given_fragsize;
5260 
5261 		inode_divisor++;
5262 	}
5263 }
5264 
5265 static int
5266 in_64bit_mode(void)
5267 {
5268 	/*  cmd must be an absolute path, for security */
5269 	char *cmd = "/usr/bin/isainfo -b";
5270 	char buf[BUFSIZ];
5271 	FILE *ptr;
5272 	int retval = 0;
5273 
5274 	putenv("IFS= \t");
5275 	if ((ptr = popen(cmd, "r")) != NULL) {
5276 		if (fgets(buf, BUFSIZ, ptr) != NULL &&
5277 		    strncmp(buf, "64", 2) == 0)
5278 			retval = 1;
5279 		(void) pclose(ptr);
5280 	}
5281 	return (retval);
5282 }
5283 
5284 /*
5285  * validate_size
5286  *
5287  * Return 1 if the device appears to be at least "size" sectors long.
5288  * Return 0 if it's shorter or we can't read it.
5289  */
5290 
5291 static int
5292 validate_size(int fd, diskaddr_t size)
5293 {
5294 	char 		buf[DEV_BSIZE];
5295 	int rc;
5296 
5297 	if ((llseek(fd, (offset_t)((size - 1) * DEV_BSIZE), SEEK_SET) == -1) ||
5298 	    (read(fd, buf, DEV_BSIZE)) != DEV_BSIZE)
5299 		rc = 0;
5300 	else
5301 		rc = 1;
5302 	return (rc);
5303 }
5304