xref: /titanic_52/usr/src/cmd/fs.d/ufs/newfs/newfs.c (revision 84ab085a13f931bc78e7415e7ce921dbaa14fcb3)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 #pragma ident	"%Z%%M%	%I%	%E% SMI"
24 	/* from UCB 5.2 9/11/85 */
25 
26 /*
27  * newfs: friendly front end to mkfs
28  *
29  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
30  * Use is subject to license terms.
31  */
32 
33 #include <sys/param.h>
34 #include <sys/types.h>
35 #include <locale.h>
36 #include <sys/stat.h>
37 #include <sys/buf.h>
38 #include <sys/fs/ufs_fs.h>
39 #include <sys/vnode.h>
40 #include <sys/fs/ufs_inode.h>
41 #include <sys/sysmacros.h>
42 
43 #include <errno.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <stdarg.h>
48 #include <stdio.h>
49 #include <fcntl.h>
50 #include <unistd.h>
51 #include <limits.h>
52 #include <libintl.h>
53 #include <sys/dkio.h>
54 #include <sys/vtoc.h>
55 #include <sys/mkdev.h>
56 #include <sys/efi_partition.h>
57 
58 #include <fslib.h>
59 
60 static unsigned int number(char *, char *, int, int);
61 static int64_t number64(char *, char *, int, int64_t);
62 static diskaddr_t getdiskbydev(char *);
63 static int  yes(void);
64 static int  notrand(char *);
65 static void usage();
66 static diskaddr_t get_device_size(int, char *);
67 static diskaddr_t brute_force_get_device_size(int);
68 static int validate_size(char *disk, diskaddr_t size);
69 static void exenv(void);
70 static struct fs *read_sb(char *);
71 /*PRINTFLIKE1*/
72 static void fatal(char *fmt, ...);
73 
74 #define	EPATH "PATH=/usr/sbin:/sbin:"
75 #define	CPATH "/sbin"					/* an EPATH element */
76 #define	MB (1024 * 1024)
77 #define	GBSEC ((1024 * 1024 * 1024) / DEV_BSIZE)	/* sectors in a GB */
78 #define	MINFREESEC ((64 * 1024 * 1024) / DEV_BSIZE)	/* sectors in 64 MB */
79 #define	MINCPG (16)	/* traditional */
80 #define	MAXDEFDENSITY (8 * 1024)	/* arbitrary */
81 #define	MINDENSITY (2 * 1024)	/* traditional */
82 #define	MIN_MTB_DENSITY (1024 * 1024)
83 #define	POWEROF2(num)	(((num) & ((num) - 1)) == 0)
84 #define	SECTORS_PER_TERABYTE	(1LL << 31)
85 /*
86  * The following constant specifies an upper limit for file system size
87  * that is actually a lot bigger than we expect to support with UFS. (Since
88  * it's specified in sectors, the file system size would be 2**44 * 512,
89  * which is 2**53, which is 8192 Terabytes.)  However, it's useful
90  * for checking the basic sanity of a size value that is input on the
91  * command line.
92  */
93 #define	FS_SIZE_UPPER_LIMIT	0x100000000000LL
94 
95 /* For use with number() */
96 #define	NR_NONE		0
97 #define	NR_PERCENT	0x01
98 
99 /*
100  * The following two constants set the default block and fragment sizes.
101  * Both constants must be a power of 2 and meet the following constraints:
102  *	MINBSIZE <= DESBLKSIZE <= MAXBSIZE
103  *	DEV_BSIZE <= DESFRAGSIZE <= DESBLKSIZE
104  *	DESBLKSIZE / DESFRAGSIZE <= 8
105  */
106 #define	DESBLKSIZE	8192
107 #define	DESFRAGSIZE	1024
108 
109 static int	Nflag;		/* run mkfs without writing file system */
110 static int	Tflag;		/* set up file system for growth to over 1 TB */
111 static int	verbose;	/* show mkfs line before exec */
112 static int	fsize = 0;		/* fragment size */
113 static int	fsize_flag = 0;	/* fragment size was specified on cmd line */
114 static int	bsize;		/* block size */
115 static int	ntracks;	/* # tracks/cylinder */
116 static int	ntracks_set = 0; /* true if the user specified ntracks */
117 static int	optim = FS_OPTTIME;	/* optimization, t(ime) or s(pace) */
118 static int	nsectors;	/* # sectors/track */
119 static int	cpg;		/* cylinders/cylinder group */
120 static int	cpg_set = 0;	/* true if the user specified cpg */
121 static int	minfree = -1;	/* free space threshold */
122 static int	rpm;		/* revolutions/minute of drive */
123 static int	rpm_set = 0;	/* true if the user specified rpm */
124 static int	nrpos = 8;	/* # of distinguished rotational positions */
125 				/* 8 is the historical default */
126 static int	nrpos_set = 0;	/* true if the user specified nrpos */
127 static int	density = 0;	/* number of bytes per inode */
128 static int	apc;		/* alternates per cylinder */
129 static int	apc_set = 0;	/* true if the user specified apc */
130 static int 	rot = -1;	/* rotational delay (msecs) */
131 static int	rot_set = 0;	/* true if the user specified rot */
132 static int 	maxcontig = -1;	/* maximum number of contig blocks */
133 static int	text_sb = 0;	/* no disk changes; just final sb text dump */
134 static int	binary_sb = 0;	/* no disk changes; just final sb binary dump */
135 static int	label_type;	/* see types below */
136 
137 #define	LABEL_TYPE_VTOC		1
138 #define	LABEL_TYPE_EFI		2
139 #define	LABEL_TYPE_OTHER	3
140 
141 static char	device[MAXPATHLEN];
142 static char	cmd[BUFSIZ];
143 
144 extern	char	*getfullrawname(); /* from libadm */
145 
146 int
147 main(int argc, char *argv[])
148 {
149 	char *special, *name;
150 	struct stat64 st;
151 	int status;
152 	int option;
153 	struct fs *sbp;	/* Pointer to superblock (if present) */
154 	diskaddr_t actual_fssize;
155 	diskaddr_t max_possible_fssize;
156 	diskaddr_t req_fssize = 0;
157 	diskaddr_t fssize = 0;
158 	char	*req_fssize_str = NULL; /* requested size argument */
159 
160 	(void) setlocale(LC_ALL, "");
161 
162 #if !defined(TEXT_DOMAIN)
163 #define	TEXT_DOMAIN	"SYS_TEST"
164 #endif
165 	(void) textdomain(TEXT_DOMAIN);
166 
167 	opterr = 0;	/* We print our own errors, disable getopt's message */
168 	while ((option = getopt(argc, argv,
169 	    "vNBSs:C:d:t:o:a:b:f:c:m:n:r:i:T")) != EOF) {
170 		switch (option) {
171 		case 'S':
172 			text_sb++;
173 			break;
174 		case 'B':
175 			binary_sb++;
176 			break;
177 		case 'v':
178 			verbose++;
179 			break;
180 
181 		case 'N':
182 			Nflag++;
183 			break;
184 
185 		case 's':
186 			/*
187 			 * The maximum file system size is a lot smaller
188 			 * than FS_SIZE_UPPER_LIMIT, but until we find out
189 			 * the device size and block size, we don't know
190 			 * what it is.  So save the requested size in a
191 			 * string so that we can print it out later if we
192 			 * determine it's too big.
193 			 */
194 			req_fssize = number64("fssize", optarg, NR_NONE,
195 			    FS_SIZE_UPPER_LIMIT);
196 			if (req_fssize < 1024)
197 				fatal(gettext(
198 				    "%s: fssize must be at least 1024"),
199 				    optarg);
200 			req_fssize_str = strdup(optarg);
201 			if (req_fssize_str == NULL)
202 				fatal(gettext(
203 				    "Insufficient memory for string copy."));
204 			break;
205 
206 		case 'C':
207 			maxcontig = number("maxcontig", optarg, NR_NONE, -1);
208 			if (maxcontig < 0)
209 				fatal(gettext("%s: bad maxcontig"), optarg);
210 			break;
211 
212 		case 'd':
213 			rot = number("rotdelay", optarg, NR_NONE, 0);
214 			rot_set = 1;
215 			if (rot < 0 || rot > 1000)
216 				fatal(gettext(
217 				    "%s: bad rotational delay"), optarg);
218 			break;
219 
220 		case 't':
221 			ntracks = number("ntrack", optarg, NR_NONE, 16);
222 			ntracks_set = 1;
223 			if ((ntracks < 0) ||
224 			    (ntracks > INT_MAX))
225 				fatal(gettext("%s: bad total tracks"), optarg);
226 			break;
227 
228 		case 'o':
229 			if (strcmp(optarg, "space") == 0)
230 			    optim = FS_OPTSPACE;
231 			else if (strcmp(optarg, "time") == 0)
232 			    optim = FS_OPTTIME;
233 			else
234 			    fatal(gettext(
235 "%s: bad optimization preference (options are `space' or `time')"),
236 				optarg);
237 			break;
238 
239 		case 'a':
240 			apc = number("apc", optarg, NR_NONE, 0);
241 			apc_set = 1;
242 			if (apc < 0 || apc > 32768) /* see mkfs.c */
243 				fatal(gettext(
244 				    "%s: bad alternates per cyl"), optarg);
245 			break;
246 
247 		case 'b':
248 			bsize = number("bsize", optarg, NR_NONE, DESBLKSIZE);
249 			if (bsize < MINBSIZE || bsize > MAXBSIZE)
250 				fatal(gettext(
251 				    "%s: bad block size"), optarg);
252 			break;
253 
254 		case 'f':
255 			fsize = number("fragsize", optarg, NR_NONE,
256 				DESFRAGSIZE);
257 			fsize_flag++;
258 			/* xxx ought to test against bsize for upper limit */
259 			if (fsize < DEV_BSIZE)
260 				fatal(gettext("%s: bad frag size"), optarg);
261 			break;
262 
263 		case 'c':
264 			cpg = number("cpg", optarg, NR_NONE, 16);
265 			cpg_set = 1;
266 			if (cpg < 1)
267 				fatal(gettext("%s: bad cylinders/group"),
268 				    optarg);
269 			break;
270 
271 		case 'm':
272 			minfree = number("minfree", optarg, NR_PERCENT, 10);
273 			if (minfree < 0 || minfree > 99)
274 				fatal(gettext("%s: bad free space %%"), optarg);
275 			break;
276 
277 		case 'n':
278 			nrpos = number("nrpos", optarg, NR_NONE, 8);
279 			nrpos_set = 1;
280 			if (nrpos <= 0)
281 				fatal(gettext(
282 				    "%s: bad number of rotational positions"),
283 				    optarg);
284 			break;
285 
286 		case 'r':
287 			rpm = number("rpm", optarg, NR_NONE, 3600);
288 			rpm_set = 1;
289 			if (rpm < 0)
290 				fatal(gettext("%s: bad revs/minute"), optarg);
291 			break;
292 
293 		case 'i':
294 			/* xxx ought to test against fsize */
295 			density = number("nbpi", optarg, NR_NONE, 2048);
296 			if (density < DEV_BSIZE)
297 				fatal(gettext("%s: bad bytes per inode"),
298 				    optarg);
299 			break;
300 
301 		case 'T':
302 			Tflag++;
303 			break;
304 
305 		default:
306 			usage();
307 			fatal(gettext("-%c: unknown flag"), optopt);
308 		}
309 	}
310 
311 	/* At this point, there should only be one argument left:	*/
312 	/* The raw-special-device itself. If not, print usage message.	*/
313 	if ((argc - optind) != 1) {
314 		usage();
315 		exit(1);
316 	}
317 
318 	name = argv[optind];
319 
320 	special = getfullrawname(name);
321 	if (special == NULL) {
322 		(void) fprintf(stderr, gettext("newfs: malloc failed\n"));
323 		exit(1);
324 	}
325 
326 	if (*special == '\0') {
327 		if (strchr(name, '/') != NULL) {
328 			if (stat64(name, &st) < 0) {
329 				(void) fprintf(stderr,
330 				    gettext("newfs: %s: %s\n"),
331 				    name, strerror(errno));
332 				exit(2);
333 			}
334 			fatal(gettext("%s: not a raw disk device"), name);
335 		}
336 		(void) sprintf(device, "/dev/rdsk/%s", name);
337 		if ((special = getfullrawname(device)) == NULL) {
338 			(void) fprintf(stderr,
339 			    gettext("newfs: malloc failed\n"));
340 			exit(1);
341 		}
342 
343 		if (*special == '\0') {
344 			(void) sprintf(device, "/dev/%s", name);
345 			if ((special = getfullrawname(device)) == NULL) {
346 				(void) fprintf(stderr,
347 				    gettext("newfs: malloc failed\n"));
348 				exit(1);
349 			}
350 			if (*special == '\0')
351 				fatal(gettext(
352 				    "%s: not a raw disk device"), name);
353 		}
354 	}
355 
356 	/*
357 	 * getdiskbydev() determines the characteristics of the special
358 	 * device on which the file system will be built.  In the case
359 	 * of devices with SMI labels (that is, non-EFI labels), the
360 	 * following characteristics are set (if they were not already
361 	 * set on the command line, since the command line settings
362 	 * take precedence):
363 	 *
364 	 *	nsectors - sectors per track
365 	 *	ntracks - tracks per cylinder
366 	 *	rpm - disk revolutions per minute
367 	 *
368 	 *	apc is NOT set
369 	 *
370 	 * getdiskbydev() also sets the following quantities for all
371 	 * devices, if not already set:
372 	 *
373 	 *	bsize - file system block size
374 	 *	maxcontig
375 	 *	label_type (efi, vtoc, or other)
376 	 *
377 	 * getdiskbydev() returns the actual size of the device, in
378 	 * sectors.
379 	 */
380 
381 	actual_fssize = getdiskbydev(special);
382 
383 	if (req_fssize == 0) {
384 		fssize = actual_fssize;
385 	} else {
386 		/*
387 		 * If the user specified a size larger than what we've
388 		 * determined as the actual size of the device, see if the
389 		 * size specified by the user can be read.  If so, use it,
390 		 * since some devices and volume managers may not support
391 		 * the vtoc and EFI interfaces we use to determine device
392 		 * size.
393 		 */
394 		if (req_fssize > actual_fssize &&
395 		    validate_size(special, req_fssize)) {
396 			(void) fprintf(stderr, gettext(
397 "Warning: the requested size of this file system\n"
398 "(%lld sectors) is greater than the size of the\n"
399 "device reported by the driver (%lld sectors).\n"
400 "However, a read of the device at the requested size\n"
401 "does succeed, so the requested size will be used.\n"),
402 			    req_fssize, actual_fssize);
403 			fssize = req_fssize;
404 		} else {
405 			fssize = MIN(req_fssize, actual_fssize);
406 		}
407 	}
408 
409 	if (label_type == LABEL_TYPE_VTOC) {
410 		if (nsectors < 0)
411 			fatal(gettext("%s: no default #sectors/track"),
412 			    special);
413 		if (ntracks < 0)
414 			fatal(gettext("%s: no default #tracks"), special);
415 		if (rpm < 0)
416 			fatal(gettext(
417 			    "%s: no default revolutions/minute value"),
418 			    special);
419 		if (rpm < 60) {
420 			(void) fprintf(stderr,
421 			    gettext("Warning: setting rpm to 60\n"));
422 			rpm = 60;
423 		}
424 	}
425 	if (label_type == LABEL_TYPE_EFI || label_type == LABEL_TYPE_OTHER) {
426 		if (ntracks_set)
427 			(void) fprintf(stderr, gettext(
428 "Warning: ntracks is obsolete for this device and will be ignored.\n"));
429 		if (cpg_set)
430 			(void) fprintf(stderr, gettext(
431 "Warning: cylinders/group is obsolete for this device and will be ignored.\n"));
432 		if (rpm_set)
433 			(void) fprintf(stderr, gettext(
434 "Warning: rpm is obsolete for this device and will be ignored.\n"));
435 		if (rot_set)
436 			(void) fprintf(stderr, gettext(
437 "Warning: rotational delay is obsolete for this device and"
438 " will be ignored.\n"));
439 		if (nrpos_set)
440 			(void) fprintf(stderr, gettext(
441 "Warning: number of rotational positions is obsolete for this device and\n"
442 "will be ignored.\n"));
443 		if (apc_set)
444 			(void) fprintf(stderr, gettext(
445 "Warning: number of alternate sectors per cylinder is obsolete for this\n"
446 "device and will be ignored.\n"));
447 
448 		/*
449 		 * We need these for the call to mkfs, even though they are
450 		 * meaningless.
451 		 */
452 		rpm = 60;
453 		nrpos = 1;
454 		apc = 0;
455 		rot = -1;
456 
457 		/*
458 		 * These values are set to produce a file system with
459 		 * a cylinder group size of 48MB.   For disks with
460 		 * non-EFI labels, most geometries result in cylinder
461 		 * groups of around 40 - 50 MB, so we arbitrarily choose
462 		 * 48MB for disks with EFI labels.  mkfs will reduce
463 		 * cylinders per group even further if necessary.
464 		 */
465 
466 		cpg = 16;
467 		nsectors = 128;
468 		ntracks = 48;
469 
470 		/*
471 		 * mkfs produces peculiar results for file systems
472 		 * that are smaller than one cylinder so don't allow
473 		 * them to be created (this check is only made for
474 		 * disks with EFI labels.  Eventually, it should probably
475 		 * be enforced for all disks.)
476 		 */
477 
478 		if (fssize < nsectors * ntracks) {
479 			fatal(gettext(
480 			    "file system size must be at least %d sectors"),
481 			    nsectors * ntracks);
482 		}
483 	}
484 
485 	if (fssize > INT_MAX)
486 		Tflag = 1;
487 
488 	/*
489 	 * If the user requested that the file system be set up for
490 	 * eventual growth to over a terabyte, or if it's already greater
491 	 * than a terabyte, set the inode density (nbpi) to MIN_MTB_DENSITY
492 	 * (unless the user has specified a larger nbpi), set the frag size
493 	 * equal to the block size, and set the cylinders-per-group value
494 	 * passed to mkfs to -1, which tells mkfs to make cylinder groups
495 	 * as large as possible.
496 	 */
497 	if (Tflag) {
498 		if (density < MIN_MTB_DENSITY)
499 			density = MIN_MTB_DENSITY;
500 		fsize = bsize;
501 		cpg = -1; 	/* says make cyl groups as big as possible */
502 	} else {
503 		if (fsize == 0)
504 			fsize = DESFRAGSIZE;
505 	}
506 
507 	if (!POWEROF2(fsize)) {
508 		(void) fprintf(stderr, gettext(
509 		    "newfs: fragment size must a power of 2, not %d\n"), fsize);
510 		fsize = bsize/8;
511 		(void) fprintf(stderr, gettext(
512 		    "newfs: fragsize reset to %ld\n"), fsize);
513 	}
514 
515 	/*
516 	 * The file system is limited in size by the fragment size.
517 	 * The number of fragments in the file system must fit into
518 	 * a signed 32-bit quantity, so the number of sectors in the
519 	 * file system is INT_MAX * the number of sectors in a frag.
520 	 */
521 
522 	max_possible_fssize = ((uint64_t)fsize)/DEV_BSIZE * INT_MAX;
523 	if (fssize > max_possible_fssize)
524 		fssize = max_possible_fssize;
525 
526 	/*
527 	 * Now fssize is the final size of the file system (in sectors).
528 	 * If it's less than what the user requested, print a message.
529 	 */
530 	if (fssize < req_fssize) {
531 		(void) fprintf(stderr, gettext(
532 		    "newfs: requested size of %s disk blocks is too large.\n"),
533 		    req_fssize_str);
534 		(void) fprintf(stderr, gettext(
535 		    "newfs: Resetting size to %lld\n"), fssize);
536 	}
537 
538 	/*
539 	 * fssize now equals the size (in sectors) of the file system
540 	 * that will be created.
541 	 */
542 
543 	/* XXX - following defaults are both here and in mkfs */
544 	if (density <= 0) {
545 		if (fssize < GBSEC)
546 			density = MINDENSITY;
547 		else
548 			density = (int)((((longlong_t)fssize + (GBSEC - 1)) /
549 						GBSEC) * MINDENSITY);
550 		if (density <= 0)
551 			density = MINDENSITY;
552 		if (density > MAXDEFDENSITY)
553 			density = MAXDEFDENSITY;
554 	}
555 	if (cpg == 0) {
556 		/*
557 		 * maxcpg calculation adapted from mkfs
558 		 * In the case of disks with EFI labels, cpg has
559 		 * already been set, so we won't enter this code.
560 		 */
561 		long maxcpg, maxipg;
562 
563 		maxipg = roundup(bsize * NBBY / 3,
564 		    bsize / sizeof (struct inode));
565 		maxcpg = (bsize - sizeof (struct cg) - howmany(maxipg, NBBY)) /
566 		    (sizeof (long) + nrpos * sizeof (short) +
567 			nsectors / (MAXFRAG * NBBY));
568 		cpg = (fssize / GBSEC) * 32;
569 		if (cpg > maxcpg)
570 			cpg = maxcpg;
571 		if (cpg <= 0)
572 			cpg = MINCPG;
573 	}
574 	if (minfree < 0) {
575 		minfree = ((float)MINFREESEC / fssize) * 100;
576 		if (minfree > 10)
577 			minfree = 10;
578 		if (minfree <= 0)
579 			minfree = 1;
580 	}
581 #ifdef i386	/* Bug 1170182 */
582 	if (ntracks > 32 && (ntracks % 16) != 0) {
583 		ntracks -= (ntracks % 16);
584 	}
585 #endif
586 	/*
587 	 * Confirmation
588 	 */
589 	if (isatty(fileno(stdin)) && !Nflag) {
590 		/*
591 		 * If we can read a valid superblock, report the mount
592 		 * point on which this filesystem was last mounted.
593 		 */
594 		if (((sbp = read_sb(special)) != 0) &&
595 		    (*sbp->fs_fsmnt != '\0')) {
596 			(void) printf(gettext(
597 			    "newfs: %s last mounted as %s\n"),
598 			    special, sbp->fs_fsmnt);
599 		}
600 		(void) printf(gettext(
601 		    "newfs: construct a new file system %s: (y/n)? "),
602 		    special);
603 		(void) fflush(stdout);
604 		if (!yes())
605 			exit(0);
606 	}
607 	/*
608 	 * If alternates-per-cylinder is ever implemented:
609 	 * need to get apc from dp->d_apc if no -a switch???
610 	 */
611 	(void) sprintf(cmd,
612 	"mkfs -F ufs %s%s%s%s %lld %d %d %d %d %d %d %d %d %s %d %d %d %d %s",
613 	    Nflag ? "-o N " : "", binary_sb ? "-o calcbinsb " : "",
614 	    text_sb ? "-o calcsb " : "", special,
615 	    fssize, nsectors, ntracks, bsize, fsize, cpg, minfree, rpm/60,
616 	    density, optim == FS_OPTSPACE ? "s" : "t", apc, rot, nrpos,
617 	    maxcontig, Tflag ? "y" : "n");
618 	if (verbose) {
619 		(void) printf("%s\n", cmd);
620 		(void) fflush(stdout);
621 	}
622 	exenv();
623 	if (status = system(cmd))
624 		exit(status >> 8);
625 	if (Nflag)
626 		exit(0);
627 	(void) sprintf(cmd, "/usr/sbin/fsirand %s", special);
628 	if (notrand(special) && (status = system(cmd)) != 0)
629 		(void) fprintf(stderr,
630 		    gettext("%s: failed, status = %d\n"),
631 		    cmd, status);
632 	return (0);
633 }
634 
635 static void
636 exenv(void)
637 {
638 	char *epath;				/* executable file path */
639 	char *cpath;				/* current path */
640 
641 	if ((cpath = getenv("PATH")) == NULL) {
642 		(void) fprintf(stderr, gettext("newfs: no PATH in env\n"));
643 		/*
644 		 * Background: the Bourne shell interpolates "." into
645 		 * the path where said path starts with a colon, ends
646 		 * with a colon, or has two adjacent colons.  Thus,
647 		 * the path ":/sbin::/usr/sbin:" is equivalent to
648 		 * ".:/sbin:.:/usr/sbin:.".  Now, we have no cpath,
649 		 * and epath ends in a colon (to make for easy
650 		 * catenation in the normal case).  By the above, if
651 		 * we use "", then "." becomes part of path.  That's
652 		 * bad, so use CPATH (which is just a duplicate of some
653 		 * element in EPATH).  No point in opening ourselves
654 		 * up to a Trojan horse attack when we don't have to....
655 		 */
656 		cpath = CPATH;
657 	}
658 	if ((epath = malloc(strlen(EPATH) + strlen(cpath) + 1)) == NULL) {
659 		(void) fprintf(stderr, gettext("newfs: malloc failed\n"));
660 		exit(1);
661 	}
662 	(void) strcpy(epath, EPATH);
663 	(void) strcat(epath, cpath);
664 	if (putenv(epath) < 0) {
665 		(void) fprintf(stderr, gettext("newfs: putenv failed\n"));
666 		exit(1);
667 	}
668 }
669 
670 static int
671 yes(void)
672 {
673 	int	i, b;
674 
675 	i = b = getchar();
676 	while (b != '\n' && b != '\0' && b != EOF)
677 		b = getchar();
678 	return (i == 'y');
679 }
680 
681 /*
682  * xxx Caller must run fmt through gettext(3) for us, if we ever
683  * xxx go the i18n route....
684  */
685 static void
686 fatal(char *fmt, ...)
687 {
688 	va_list pvar;
689 
690 	(void) fprintf(stderr, "newfs: ");
691 	va_start(pvar, fmt);
692 	(void) vfprintf(stderr, fmt, pvar);
693 	va_end(pvar);
694 	(void) putc('\n', stderr);
695 	exit(10);
696 }
697 
698 static diskaddr_t
699 getdiskbydev(char *disk)
700 {
701 	struct dk_geom g;
702 	struct dk_cinfo ci;
703 	diskaddr_t actual_size;
704 	int fd;
705 
706 	if ((fd = open64(disk, 0)) < 0) {
707 		perror(disk);
708 		exit(1);
709 	}
710 
711 	/*
712 	 * get_device_size() determines the actual size of the
713 	 * device, and also the disk's attributes, such as geometry.
714 	 */
715 	actual_size = get_device_size(fd, disk);
716 
717 	if (label_type == LABEL_TYPE_VTOC) {
718 		if (ioctl(fd, DKIOCGGEOM, &g))
719 			fatal(gettext(
720 			    "%s: Unable to read Disk geometry"), disk);
721 		if (nsectors == 0)
722 			nsectors = g.dkg_nsect;
723 		if (ntracks == 0)
724 			ntracks = g.dkg_nhead;
725 		if (rpm == 0)
726 			rpm = ((int)g.dkg_rpm <= 0) ? 3600: g.dkg_rpm;
727 	}
728 
729 	if (bsize == 0)
730 		bsize = DESBLKSIZE;
731 	/*
732 	 * Adjust maxcontig by the device's maxtransfer. If maxtransfer
733 	 * information is not available, default to the min of a MB and
734 	 * maxphys.
735 	 */
736 	if (maxcontig == -1 && ioctl(fd, DKIOCINFO, &ci) == 0) {
737 		maxcontig = ci.dki_maxtransfer * DEV_BSIZE;
738 		if (maxcontig < 0) {
739 			int	error, gotit, maxphys;
740 			gotit = fsgetmaxphys(&maxphys, &error);
741 
742 			/*
743 			 * If we cannot get the maxphys value, default
744 			 * to ufs_maxmaxphys (MB).
745 			 */
746 			if (gotit) {
747 				maxcontig = MIN(maxphys, MB);
748 			} else {
749 				(void) fprintf(stderr, gettext(
750 "Warning: Could not get system value for maxphys. The value for maxcontig\n"
751 "will default to 1MB.\n"));
752 			maxcontig = MB;
753 			}
754 		}
755 		maxcontig /= bsize;
756 	}
757 	(void) close(fd);
758 	return (actual_size);
759 }
760 
761 /*
762  * Figure out how big the partition we're dealing with is.
763  */
764 static diskaddr_t
765 get_device_size(int fd, char *name)
766 {
767 	struct vtoc vtoc;
768 	dk_gpt_t *efi_vtoc;
769 	diskaddr_t	slicesize;
770 
771 	int index = read_vtoc(fd, &vtoc);
772 
773 	if (index >= 0) {
774 		label_type = LABEL_TYPE_VTOC;
775 	} else {
776 		if (index == VT_ENOTSUP || index == VT_ERROR) {
777 			/* it might be an EFI label */
778 			index = efi_alloc_and_read(fd, &efi_vtoc);
779 			if (index >= 0)
780 				label_type = LABEL_TYPE_EFI;
781 		}
782 	}
783 
784 	if (index < 0) {
785 		/*
786 		 * Since both attempts to read the label failed, we're
787 		 * going to fall back to a brute force approach to
788 		 * determining the device's size:  see how far out we can
789 		 * perform reads on the device.
790 		 */
791 
792 		slicesize = brute_force_get_device_size(fd);
793 		if (slicesize == 0) {
794 			switch (index) {
795 			case VT_ERROR:
796 				(void) fprintf(stderr, gettext(
797 				    "newfs: %s: %s\n"), name, strerror(errno));
798 				exit(10);
799 				/*NOTREACHED*/
800 			case VT_EIO:
801 				fatal(gettext(
802 				    "%s: I/O error accessing VTOC"), name);
803 				/*NOTREACHED*/
804 			case VT_EINVAL:
805 				fatal(gettext(
806 				    "%s: Invalid field in VTOC"), name);
807 				/*NOTREACHED*/
808 			default:
809 				fatal(gettext(
810 				    "%s: unknown error accessing VTOC"),
811 				    name);
812 				/*NOTREACHED*/
813 			}
814 		} else {
815 			label_type = LABEL_TYPE_OTHER;
816 		}
817 	}
818 
819 	if (label_type == LABEL_TYPE_EFI) {
820 		slicesize = efi_vtoc->efi_parts[index].p_size;
821 		efi_free(efi_vtoc);
822 	} else if (label_type == LABEL_TYPE_VTOC) {
823 		/*
824 		 * In the vtoc struct, p_size is a 32-bit signed quantity.
825 		 * In the dk_gpt struct (efi's version of the vtoc), p_size
826 		 * is an unsigned 64-bit quantity.  By casting the vtoc's
827 		 * psize to an unsigned 32-bit quantity, it will be copied
828 		 * to 'slicesize' (an unsigned 64-bit diskaddr_t) without
829 		 * sign extension.
830 		 */
831 
832 		slicesize = (uint32_t)vtoc.v_part[index].p_size;
833 	}
834 
835 	return (slicesize);
836 }
837 
838 /*
839  * brute_force_get_device_size
840  *
841  * Determine the size of the device by seeing how far we can
842  * read.  Doing an llseek( , , SEEK_END) would probably work
843  * in most cases, but we've seen at least one third-party driver
844  * which doesn't correctly support the SEEK_END option when the
845  * the device is greater than a terabyte.
846  */
847 
848 static diskaddr_t
849 brute_force_get_device_size(int fd)
850 {
851 	diskaddr_t	min_fail = 0;
852 	diskaddr_t	max_succeed = 0;
853 	diskaddr_t	cur_db_off;
854 	char 		buf[DEV_BSIZE];
855 
856 	/*
857 	 * First, see if we can read the device at all, just to
858 	 * eliminate errors that have nothing to do with the
859 	 * device's size.
860 	 */
861 
862 	if (((llseek(fd, (offset_t)0, SEEK_SET)) == -1) ||
863 	    ((read(fd, buf, DEV_BSIZE)) == -1))
864 		return (0);  /* can't determine size */
865 
866 	/*
867 	 * Now, go sequentially through the multiples of 4TB
868 	 * to find the first read that fails (this isn't strictly
869 	 * the most efficient way to find the actual size if the
870 	 * size really could be anything between 0 and 2**64 bytes.
871 	 * We expect the sizes to be less than 16 TB for some time,
872 	 * so why do a bunch of reads that are larger than that?
873 	 * However, this algorithm *will* work for sizes of greater
874 	 * than 16 TB.  We're just not optimizing for those sizes.)
875 	 */
876 
877 	for (cur_db_off = SECTORS_PER_TERABYTE * 4;
878 	    min_fail == 0 && cur_db_off < FS_SIZE_UPPER_LIMIT;
879 	    cur_db_off += 4 * SECTORS_PER_TERABYTE) {
880 		if (((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
881 		    SEEK_SET)) == -1) ||
882 		    ((read(fd, buf, DEV_BSIZE)) != DEV_BSIZE))
883 			min_fail = cur_db_off;
884 		else
885 			max_succeed = cur_db_off;
886 	}
887 
888 	if (min_fail == 0)
889 		return (0);
890 
891 	/*
892 	 * We now know that the size of the device is less than
893 	 * min_fail and greater than or equal to max_succeed.  Now
894 	 * keep splitting the difference until the actual size in
895 	 * sectors in known.  We also know that the difference
896 	 * between max_succeed and min_fail at this time is
897 	 * 4 * SECTORS_PER_TERABYTE, which is a power of two, which
898 	 * simplifies the math below.
899 	 */
900 
901 	while (min_fail - max_succeed > 1) {
902 		cur_db_off = max_succeed + (min_fail - max_succeed)/2;
903 		if (((llseek(fd, (offset_t)(cur_db_off * DEV_BSIZE),
904 		    SEEK_SET)) == -1) ||
905 		    ((read(fd, buf, DEV_BSIZE)) != DEV_BSIZE))
906 			min_fail = cur_db_off;
907 		else
908 			max_succeed = cur_db_off;
909 	}
910 
911 	/* the size is the last successfully read sector offset plus one */
912 	return (max_succeed + 1);
913 }
914 
915 /*
916  * validate_size
917  *
918  * Return 1 if the device appears to be at least "size" sectors long.
919  * Return 0 if it's shorter or we can't read it.
920  */
921 
922 static int
923 validate_size(char *disk, diskaddr_t size)
924 {
925 	char 		buf[DEV_BSIZE];
926 	int fd, rc;
927 
928 	if ((fd = open64(disk, O_RDONLY)) < 0) {
929 		perror(disk);
930 		exit(1);
931 	}
932 
933 	if ((llseek(fd, (offset_t)((size - 1) * DEV_BSIZE), SEEK_SET) == -1) ||
934 	    (read(fd, buf, DEV_BSIZE)) != DEV_BSIZE)
935 		rc = 0;
936 	else
937 		rc = 1;
938 	(void) close(fd);
939 	return (rc);
940 }
941 
942 /*
943  * read_sb(char * rawdev) - Attempt to read the superblock from a raw device
944  *
945  * Returns:
946  *	0 :
947  *		Could not read a valid superblock for a variety of reasons.
948  *		Since 'newfs' handles any fatal conditions, we're not going
949  *		to make any guesses as to why this is failing or what should
950  *		be done about it.
951  *
952  *	struct fs *:
953  *		A pointer to (what we think is) a valid superblock. The
954  *		space for the superblock is static (inside the function)
955  *		since we will only be reading the values from it.
956  */
957 
958 struct fs *
959 read_sb(char *fsdev)
960 {
961 	static struct fs	sblock;
962 	struct stat64		statb;
963 	int			dskfd;
964 	char			*bufp = NULL;
965 	int			bufsz = 0;
966 
967 	if (stat64(fsdev, &statb) < 0)
968 		return (0);
969 
970 	if ((dskfd = open64(fsdev, O_RDONLY)) < 0)
971 		return (0);
972 
973 	/*
974 	 * We need a buffer whose size is a multiple of DEV_BSIZE in order
975 	 * to read from a raw device (which we were probably passed).
976 	 */
977 	bufsz = ((sizeof (sblock) / DEV_BSIZE) + 1) * DEV_BSIZE;
978 	if ((bufp = malloc(bufsz)) == NULL) {
979 		(void) close(dskfd);
980 		return (0);
981 	}
982 
983 	if (llseek(dskfd, (offset_t)SBOFF, SEEK_SET) < 0 ||
984 	    read(dskfd, bufp, bufsz) < 0) {
985 		(void) close(dskfd);
986 		free(bufp);
987 		return (0);
988 	}
989 	(void) close(dskfd);	/* Done with the file */
990 
991 	(void) memcpy(&sblock, bufp, sizeof (sblock));
992 	free(bufp);	/* Don't need this anymore */
993 
994 	if (((sblock.fs_magic != FS_MAGIC) &&
995 	    (sblock.fs_magic != MTB_UFS_MAGIC)) ||
996 	    sblock.fs_ncg < 1 || sblock.fs_cpg < 1)
997 		return (0);
998 
999 	if (sblock.fs_ncg * sblock.fs_cpg < sblock.fs_ncyl ||
1000 	    (sblock.fs_ncg - 1) * sblock.fs_cpg >= sblock.fs_ncyl)
1001 		return (0);
1002 
1003 	if (sblock.fs_sbsize < 0 || sblock.fs_sbsize > SBSIZE)
1004 		return (0);
1005 
1006 	return (&sblock);
1007 }
1008 
1009 /*
1010  * Read the UFS file system on the raw device SPECIAL.  If it does not
1011  * appear to be a UFS file system, return non-zero, indicating that
1012  * fsirand should be called (and it will spit out an error message).
1013  * If it is a UFS file system, take a look at the inodes in the first
1014  * cylinder group.  If they appear to be randomized (non-zero), return
1015  * zero, which will cause fsirand to not be called.  If the inode generation
1016  * counts are all zero, then we must call fsirand, so return non-zero.
1017  */
1018 
1019 #define	RANDOMIZED	0
1020 #define	NOT_RANDOMIZED	1
1021 
1022 static int
1023 notrand(char *special)
1024 {
1025 	long fsbuf[SBSIZE / sizeof (long)];
1026 	struct dinode dibuf[MAXBSIZE/sizeof (struct dinode)];
1027 	struct fs *fs;
1028 	struct dinode *dip;
1029 	offset_t seekaddr;
1030 	int bno, inum;
1031 	int fd;
1032 
1033 	fs = (struct fs *)fsbuf;
1034 	if ((fd = open64(special, 0)) == -1)
1035 		return (NOT_RANDOMIZED);
1036 	if (llseek(fd, (offset_t)SBLOCK * DEV_BSIZE, 0) == -1 ||
1037 	    read(fd, (char *)fs, SBSIZE) != SBSIZE ||
1038 	    ((fs->fs_magic != FS_MAGIC) && (fs->fs_magic != MTB_UFS_MAGIC))) {
1039 		(void) close(fd);
1040 		return (NOT_RANDOMIZED);
1041 	}
1042 
1043 	/* looks like a UFS file system; read the first cylinder group */
1044 	bsize = INOPB(fs) * sizeof (struct dinode);
1045 	inum = 0;
1046 	while (inum < fs->fs_ipg) {
1047 		bno = itod(fs, inum);
1048 		seekaddr = (offset_t)fsbtodb(fs, bno) * DEV_BSIZE;
1049 		if (llseek(fd, seekaddr, 0) == -1 ||
1050 		    read(fd, (char *)dibuf, bsize) != bsize) {
1051 			(void) close(fd);
1052 			return (NOT_RANDOMIZED);
1053 		}
1054 		for (dip = dibuf; dip < &dibuf[INOPB(fs)]; dip++) {
1055 			if (dip->di_gen != 0) {
1056 				(void) close(fd);
1057 				return (RANDOMIZED);
1058 			}
1059 			inum++;
1060 		}
1061 	}
1062 	(void) close(fd);
1063 	return (NOT_RANDOMIZED);
1064 }
1065 
1066 static void
1067 usage(void)
1068 {
1069 	(void) fprintf(stderr, gettext(
1070 	    "usage: newfs [ -v ] [ mkfs-options ] raw-special-device\n"));
1071 	(void) fprintf(stderr, gettext("where mkfs-options are:\n"));
1072 	(void) fprintf(stderr, gettext(
1073 	    "\t-N do not create file system, just print out parameters\n"));
1074 	(void) fprintf(stderr, gettext(
1075 "\t-T configure file system for eventual growth to over a terabyte\n"));
1076 	(void) fprintf(stderr, gettext("\t-s file system size (sectors)\n"));
1077 	(void) fprintf(stderr, gettext("\t-b block size\n"));
1078 	(void) fprintf(stderr, gettext("\t-f frag size\n"));
1079 	(void) fprintf(stderr, gettext("\t-t tracks/cylinder\n"));
1080 	(void) fprintf(stderr, gettext("\t-c cylinders/group\n"));
1081 	(void) fprintf(stderr, gettext("\t-m minimum free space %%\n"));
1082 	(void) fprintf(stderr, gettext(
1083 	    "\t-o optimization preference (`space' or `time')\n"));
1084 	(void) fprintf(stderr, gettext("\t-r revolutions/minute\n"));
1085 	(void) fprintf(stderr, gettext("\t-i number of bytes per inode\n"));
1086 	(void) fprintf(stderr, gettext(
1087 	    "\t-a number of alternates per cylinder\n"));
1088 	(void) fprintf(stderr, gettext("\t-C maxcontig\n"));
1089 	(void) fprintf(stderr, gettext("\t-d rotational delay\n"));
1090 	(void) fprintf(stderr, gettext(
1091 	    "\t-n number of rotational positions\n"));
1092 	(void) fprintf(stderr, gettext(
1093 "\t-S print a textual version of the calculated superblock to stdout\n"));
1094 	(void) fprintf(stderr, gettext(
1095 "\t-B dump a binary version of the calculated superblock to stdout\n"));
1096 }
1097 
1098 /*
1099  * Error-detecting version of atoi(3).  Adapted from mkfs' number().
1100  */
1101 static unsigned int
1102 number(char *param, char *value, int flags, int def_value)
1103 {
1104 	char *cs;
1105 	int n;
1106 	int cut = INT_MAX / 10;    /* limit to avoid overflow */
1107 	int minus = 0;
1108 
1109 	cs = value;
1110 	if (*cs == '-') {
1111 		minus = 1;
1112 		cs += 1;
1113 	}
1114 	if ((*cs < '0') || (*cs > '9')) {
1115 		goto bail_out;
1116 	}
1117 	n = 0;
1118 	while ((*cs >= '0') && (*cs <= '9') && (n <= cut)) {
1119 		n = n*10 + *cs++ - '0';
1120 	}
1121 	if (minus)
1122 	    n = -n;
1123 	for (;;) {
1124 		switch (*cs++) {
1125 		case '\0':
1126 			return (n);
1127 
1128 		case '0': case '1': case '2': case '3': case '4':
1129 		case '5': case '6': case '7': case '8': case '9':
1130 			(void) fprintf(stderr, gettext(
1131 			    "newfs: value for %s overflowed, using %d\n"),
1132 			    param, def_value);
1133 			return (def_value);
1134 
1135 		case '%':
1136 			if (flags & NR_PERCENT)
1137 				break;
1138 			/* FALLTHROUGH */
1139 
1140 		default:
1141 bail_out:
1142 			fatal(gettext("bad numeric arg for %s: \"%s\""),
1143 			    param, value);
1144 
1145 		}
1146 	}
1147 	/* NOTREACHED */
1148 }
1149 
1150 /*
1151  * Error-detecting version of atoi(3).  Adapted from mkfs' number().
1152  */
1153 static int64_t
1154 number64(char *param, char *value, int flags, int64_t def_value)
1155 {
1156 	char *cs;
1157 	int64_t n;
1158 	int64_t cut = FS_SIZE_UPPER_LIMIT/ 10;    /* limit to avoid overflow */
1159 	int minus = 0;
1160 
1161 	cs = value;
1162 	if (*cs == '-') {
1163 		minus = 1;
1164 		cs += 1;
1165 	}
1166 	if ((*cs < '0') || (*cs > '9')) {
1167 		goto bail_out;
1168 	}
1169 	n = 0;
1170 	while ((*cs >= '0') && (*cs <= '9') && (n <= cut)) {
1171 		n = n*10 + *cs++ - '0';
1172 	}
1173 	if (minus)
1174 	    n = -n;
1175 	for (;;) {
1176 		switch (*cs++) {
1177 		case '\0':
1178 			return (n);
1179 
1180 		case '0': case '1': case '2': case '3': case '4':
1181 		case '5': case '6': case '7': case '8': case '9':
1182 			(void) fprintf(stderr, gettext(
1183 			    "newfs: value for %s overflowed, using %d\n"),
1184 			    param, def_value);
1185 			return (def_value);
1186 
1187 		case '%':
1188 			if (flags & NR_PERCENT)
1189 				break;
1190 			/* FALLTHROUGH */
1191 
1192 		default:
1193 bail_out:
1194 			fatal(gettext("bad numeric arg for %s: \"%s\""),
1195 			    param, value);
1196 
1197 		}
1198 	}
1199 	/* NOTREACHED */
1200 }
1201