1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 27 /* All Rights Reserved */ 28 29 /* 30 * University Copyright- Copyright (c) 1982, 1986, 1988 31 * The Regents of the University of California 32 * All Rights Reserved 33 * 34 * University Acknowledgment- Portions of this document are derived from 35 * software developed by the University of California, Berkeley, and its 36 * contributors. 37 */ 38 39 #pragma ident "%Z%%M% %I% %E% SMI" 40 41 42 /* 43 * The maximum supported file system size (in sectors) is the 44 * number of frags that can be represented in an int32_t field 45 * (INT_MAX) times the maximum number of sectors per frag. Since 46 * the maximum frag size is MAXBSIZE, the maximum number of sectors 47 * per frag is MAXBSIZE/DEV_BSIZE. 48 */ 49 #define FS_MAX (((diskaddr_t)INT_MAX) * (MAXBSIZE/DEV_BSIZE)) 50 51 /* 52 * make file system for cylinder-group style file systems 53 * 54 * usage: 55 * 56 * mkfs [-F FSType] [-V] [-G [-P]] [-M dirname] [-m] [options] 57 * [-o specific_options] special size 58 * [nsect ntrack bsize fsize cpg minfree rps nbpi opt apc rotdelay 59 * 2 3 4 5 6 7 8 9 10 11 12 60 * nrpos maxcontig mtb] 61 * 13 14 15 62 * 63 * where specific_options are: 64 * N - no create 65 * nsect - The number of sectors per track 66 * ntrack - The number of tracks per cylinder 67 * bsize - block size 68 * fragsize - fragment size 69 * cgsize - The number of disk cylinders per cylinder group. 70 * free - minimum free space 71 * rps - rotational speed (rev/sec). 72 * nbpi - number of data bytes per allocated inode 73 * opt - optimization (space, time) 74 * apc - number of alternates 75 * gap - gap size 76 * nrpos - number of rotational positions 77 * maxcontig - maximum number of logical blocks that will be 78 * allocated contiguously before inserting rotational delay 79 * mtb - if "y", set up file system for eventual growth to over a 80 * a terabyte 81 * -P Do not grow the file system, but print on stdout the maximal 82 * size in sectors to which the file system can be increased. The calculated 83 * size is limited by the value provided by the operand size. 84 * 85 * Note that -P is a project-private interface and together with -G intended 86 * to be used only by the growfs script. It is therefore purposely not 87 * documented in the man page. 88 * The -P option is covered by PSARC case 2003/422. 89 */ 90 91 /* 92 * The following constants set the defaults used for the number 93 * of sectors/track (fs_nsect), and number of tracks/cyl (fs_ntrak). 94 * 95 * NSECT NTRAK 96 * 72MB CDC 18 9 97 * 30MB CDC 18 5 98 * 720KB Diskette 9 2 99 * 100 * However the defaults will be different for disks larger than CHSLIMIT. 101 */ 102 103 #define DFLNSECT 32 104 #define DFLNTRAK 16 105 106 /* 107 * The following default sectors and tracks values are used for 108 * non-efi disks that are larger than the CHS addressing limit. The 109 * existing default cpg of 16 (DESCPG) holds good for larger disks too. 110 */ 111 #define DEF_SECTORS_EFI 128 112 #define DEF_TRACKS_EFI 48 113 114 /* 115 * The maximum number of cylinders in a group depends upon how much 116 * information can be stored on a single cylinder. The default is to 117 * use 16 cylinders per group. This is effectively tradition - it was 118 * the largest value acceptable under SunOs 4.1 119 */ 120 #define DESCPG 16 /* desired fs_cpg */ 121 122 /* 123 * The following two constants set the default block and fragment sizes. 124 * Both constants must be a power of 2 and meet the following constraints: 125 * MINBSIZE <= DESBLKSIZE <= MAXBSIZE 126 * DEV_BSIZE <= DESFRAGSIZE <= DESBLKSIZE 127 * DESBLKSIZE / DESFRAGSIZE <= 8 128 */ 129 #define DESBLKSIZE 8192 130 #define DESFRAGSIZE 1024 131 132 /* 133 * MINFREE gives the minimum acceptable percentage of file system 134 * blocks which may be free. If the freelist drops below this level 135 * only the superuser may continue to allocate blocks. This may 136 * be set to 0 if no reserve of free blocks is deemed necessary, 137 * however throughput drops by fifty percent if the file system 138 * is run at between 90% and 100% full; thus the default value of 139 * fs_minfree is 10%. With 10% free space, fragmentation is not a 140 * problem, so we choose to optimize for time. 141 */ 142 #define MINFREE 10 143 #define DEFAULTOPT FS_OPTTIME 144 145 /* 146 * ROTDELAY gives the minimum number of milliseconds to initiate 147 * another disk transfer on the same cylinder. It is no longer used 148 * and will always default to 0. 149 */ 150 #define ROTDELAY 0 151 152 /* 153 * MAXBLKPG determines the maximum number of data blocks which are 154 * placed in a single cylinder group. The default is one indirect 155 * block worth of data blocks. 156 */ 157 #define MAXBLKPG(bsize) ((bsize) / sizeof (daddr32_t)) 158 159 /* 160 * Each file system has a number of inodes statically allocated. 161 * We allocate one inode slot per NBPI bytes, expecting this 162 * to be far more than we will ever need. 163 */ 164 #define NBPI 2048 /* Number Bytes Per Inode */ 165 #define MTB_NBPI (MB) /* Number Bytes Per Inode for multi-terabyte */ 166 167 /* 168 * Disks are assumed to rotate at 60HZ, unless otherwise specified. 169 */ 170 #define DEFHZ 60 171 172 /* 173 * Cylinder group related limits. 174 * 175 * For each cylinder we keep track of the availability of blocks at different 176 * rotational positions, so that we can lay out the data to be picked 177 * up with minimum rotational latency. NRPOS is the number of rotational 178 * positions which we distinguish. With NRPOS 8 the resolution of our 179 * summary information is 2ms for a typical 3600 rpm drive. 180 */ 181 #define NRPOS 8 /* number distinct rotational positions */ 182 183 #ifdef DEBUG 184 #define dprintf(x) printf x 185 #else 186 #define dprintf(x) 187 #endif 188 189 /* 190 * For the -N option, when calculating the backup superblocks, do not print 191 * them if we are not really sure. We may have to try an alternate method of 192 * arriving at the superblocks. So defer printing till a handful of superblocks 193 * look good. 194 */ 195 #define tprintf(x) if (Nflag && retry) \ 196 (void) strncat(tmpbuf, x, strlen(x)); \ 197 else \ 198 (void) fprintf(stderr, x); 199 200 #define ALTSB 32 /* Location of first backup superblock */ 201 202 /* 203 * range_check "user_supplied" flag values. 204 */ 205 #define RC_DEFAULT 0 206 #define RC_KEYWORD 1 207 #define RC_POSITIONAL 2 208 209 /* 210 * ufs hole 211 */ 212 #define UFS_HOLE -1 213 214 #ifndef STANDALONE 215 #include <stdio.h> 216 #include <sys/mnttab.h> 217 #endif 218 219 #include <stdlib.h> 220 #include <unistd.h> 221 #include <malloc.h> 222 #include <string.h> 223 #include <strings.h> 224 #include <ctype.h> 225 #include <errno.h> 226 #include <sys/param.h> 227 #include <time.h> 228 #include <sys/types.h> 229 #include <sys/sysmacros.h> 230 #include <sys/vnode.h> 231 #include <sys/fs/ufs_fsdir.h> 232 #include <sys/fs/ufs_inode.h> 233 #include <sys/fs/ufs_fs.h> 234 #include <sys/fs/ufs_log.h> 235 #include <sys/mntent.h> 236 #include <sys/filio.h> 237 #include <limits.h> 238 #include <sys/int_const.h> 239 #include <signal.h> 240 #include <sys/efi_partition.h> 241 #include "roll_log.h" 242 243 #define bcopy(f, t, n) (void) memcpy(t, f, n) 244 #define bzero(s, n) (void) memset(s, 0, n) 245 #define bcmp(s, d, n) memcmp(s, d, n) 246 247 #define index(s, r) strchr(s, r) 248 #define rindex(s, r) strrchr(s, r) 249 250 #include <sys/stat.h> 251 #include <sys/statvfs.h> 252 #include <locale.h> 253 #include <fcntl.h> 254 #include <sys/isa_defs.h> /* for ENDIAN defines */ 255 #include <sys/vtoc.h> 256 257 #include <sys/dkio.h> 258 #include <sys/asynch.h> 259 260 extern offset_t llseek(); 261 extern char *getfullblkname(); 262 extern long lrand48(); 263 264 extern int optind; 265 extern char *optarg; 266 267 268 /* 269 * The size of a cylinder group is calculated by CGSIZE. The maximum size 270 * is limited by the fact that cylinder groups are at most one block. 271 * Its size is derived from the size of the maps maintained in the 272 * cylinder group and the (struct cg) size. 273 */ 274 #define CGSIZE(fs) \ 275 /* base cg */ (sizeof (struct cg) + \ 276 /* blktot size */ (fs)->fs_cpg * sizeof (long) + \ 277 /* blks size */ (fs)->fs_cpg * (fs)->fs_nrpos * sizeof (short) + \ 278 /* inode map */ howmany((fs)->fs_ipg, NBBY) + \ 279 /* block map */ howmany((fs)->fs_cpg * (fs)->fs_spc / NSPF(fs), NBBY)) 280 281 /* 282 * We limit the size of the inode map to be no more than a 283 * third of the cylinder group space, since we must leave at 284 * least an equal amount of space for the block map. 285 * 286 * N.B.: MAXIpG must be a multiple of INOPB(fs). 287 */ 288 #define MAXIpG(fs) roundup((fs)->fs_bsize * NBBY / 3, INOPB(fs)) 289 290 /* 291 * Same as MAXIpG, but parameterized by the block size (b) and the 292 * cylinder group divisor (d), which is the reciprocal of the fraction of the 293 * cylinder group overhead block that is used for the inode map. So for 294 * example, if d = 5, the macro's computation assumes that 1/5 of the 295 * cylinder group overhead block can be dedicated to the inode map. 296 */ 297 #define MAXIpG_B(b, d) roundup((b) * NBBY / (d), (b) / sizeof (struct dinode)) 298 299 #define UMASK 0755 300 #define MAXINOPB (MAXBSIZE / sizeof (struct dinode)) 301 #define POWEROF2(num) (((num) & ((num) - 1)) == 0) 302 #define MB (1024*1024) 303 #define BETWEEN(x, l, h) ((x) >= (l) && (x) <= (h)) 304 305 /* 306 * Used to set the inode generation number. Since both inodes and dinodes 307 * are dealt with, we really need a pointer to an icommon here. 308 */ 309 #define IRANDOMIZE(icp) (icp)->ic_gen = lrand48(); 310 311 /* 312 * Flags for number() 313 */ 314 #define ALLOW_PERCENT 0x01 /* allow trailing `%' on number */ 315 #define ALLOW_MS1 0x02 /* allow trailing `ms', state 1 */ 316 #define ALLOW_MS2 0x04 /* allow trailing `ms', state 2 */ 317 #define ALLOW_END_ONLY 0x08 /* must be at end of number & suffixes */ 318 319 #define MAXAIO 1000 /* maximum number of outstanding I/O's we'll manage */ 320 #define BLOCK 1 /* block in aiowait */ 321 #define NOBLOCK 0 /* don't block in aiowait */ 322 323 #define RELEASE 1 /* free an aio buffer after use */ 324 #define SAVE 0 /* don't free the buffer */ 325 326 typedef struct aio_trans { 327 aio_result_t resultbuf; 328 diskaddr_t bno; 329 char *buffer; 330 int size; 331 int release; 332 struct aio_trans *next; 333 } aio_trans; 334 335 typedef struct aio_results { 336 int max; 337 int outstanding; 338 int maxpend; 339 aio_trans *trans; 340 } aio_results; 341 342 int aio_inited = 0; 343 aio_results results; 344 345 /* 346 * Allow up to MAXBUF aio requests that each have a unique buffer. 347 * More aio's might be done, but not using memory through the getbuf() 348 * interface. This can be raised, but you run into the potential of 349 * using more memory than is physically available on the machine, 350 * and if you start swapping, you can forget about performance. 351 * To prevent this, we also limit the total memory used for a given 352 * type of buffer to MAXBUFMEM. 353 * 354 * Tests indicate a cylinder group's worth of inodes takes: 355 * 356 * NBPI Size of Inode Buffer 357 * 2k 1688k 358 * 8k 424k 359 * 360 * initcg() stores all the inodes for a cylinder group in one buffer, 361 * so allowing 20 buffers could take 32 MB if not limited by MAXBUFMEM. 362 */ 363 #define MAXBUF 20 364 #define MAXBUFMEM (8 * 1024 * 1024) 365 366 /* 367 * header information for buffers managed by getbuf() and freebuf() 368 */ 369 typedef struct bufhdr { 370 struct bufhdr *head; 371 struct bufhdr *next; 372 } bufhdr; 373 374 int bufhdrsize; 375 376 bufhdr inodebuf = { NULL, NULL }; 377 bufhdr cgsumbuf = { NULL, NULL }; 378 379 #define SECTORS_PER_TERABYTE (1LL << 31) 380 /* 381 * The following constant specifies an upper limit for file system size 382 * that is actually a lot bigger than we expect to support with UFS. (Since 383 * it's specified in sectors, the file system size would be 2**44 * 512, 384 * which is 2**53, which is 8192 Terabytes.) However, it's useful 385 * for checking the basic sanity of a size value that is input on the 386 * command line. 387 */ 388 #define FS_SIZE_UPPER_LIMIT 0x100000000000LL 389 390 /* 391 * Forward declarations 392 */ 393 static char *getbuf(bufhdr *bufhead, int size); 394 static void freebuf(char *buf); 395 static void freetrans(aio_trans *transp); 396 static aio_trans *get_aiop(); 397 static aio_trans *wait_for_write(int block); 398 static void initcg(int cylno); 399 static void fsinit(); 400 static int makedir(struct direct *protodir, int entries); 401 static void iput(struct inode *ip); 402 static void rdfs(diskaddr_t bno, int size, char *bf); 403 static void wtfs(diskaddr_t bno, int size, char *bf); 404 static void awtfs(diskaddr_t bno, int size, char *bf, int release); 405 static void wtfs_breakup(diskaddr_t bno, int size, char *bf); 406 static int isblock(struct fs *fs, unsigned char *cp, int h); 407 static void clrblock(struct fs *fs, unsigned char *cp, int h); 408 static void setblock(struct fs *fs, unsigned char *cp, int h); 409 static void usage(); 410 static void dump_fscmd(char *fsys, int fsi); 411 static uint64_t number(uint64_t d_value, char *param, int flags); 412 static int match(char *s); 413 static char checkopt(char *optim); 414 static char checkmtb(char *mtbarg); 415 static void range_check(long *varp, char *name, long minimum, 416 long maximum, long def_val, int user_supplied); 417 static void range_check_64(uint64_t *varp, char *name, uint64_t minimum, 418 uint64_t maximum, uint64_t def_val, int user_supplied); 419 static daddr32_t alloc(int size, int mode); 420 static diskaddr_t get_max_size(int fd); 421 static long get_max_track_size(int fd); 422 static void block_sigint(sigset_t *old_mask); 423 static void unblock_sigint(sigset_t *old_mask); 424 static void recover_from_sigint(int signum); 425 static int confirm_abort(void); 426 static int getline(FILE *fp, char *loc, int maxlen); 427 static void flush_writes(void); 428 static long compute_maxcpg(long, long, long, long, long); 429 static int in_64bit_mode(void); 430 static int validate_size(int fd, diskaddr_t size); 431 static void dump_sblock(void); 432 433 /* 434 * Workaround for mkfs to function properly on disks attached to XMIT 2.X 435 * controller. If the address is not aligned at 8 byte boundary, mkfs on 436 * disks attached to XMIT 2.X controller exhibts un-predictable behaviour. 437 */ 438 #define XMIT_2_X_ALIGN 8 439 #pragma align XMIT_2_X_ALIGN(fsun, altfsun, cgun) 440 441 union { 442 struct fs fs; 443 char pad[SBSIZE]; 444 } fsun, altfsun; 445 #define sblock fsun.fs 446 #define altsblock altfsun.fs 447 448 struct csum *fscs; 449 450 union cgun { 451 struct cg cg; 452 char pad[MAXBSIZE]; 453 } cgun; 454 455 #define acg cgun.cg 456 /* 457 * Size of screen in cols in which to fit output 458 */ 459 #define WIDTH 80 460 461 struct dinode zino[MAXBSIZE / sizeof (struct dinode)]; 462 463 /* 464 * file descriptors used for rdfs(fsi) and wtfs(fso). 465 * Initialized to an illegal file descriptor number. 466 */ 467 int fsi = -1; 468 int fso = -1; 469 470 /* 471 * The BIG parameter is machine dependent. It should be a longlong integer 472 * constant that can be used by the number parser to check the validity 473 * of numeric parameters. 474 */ 475 476 #define BIG 0x7fffffffffffffffLL 477 478 /* Used to indicate to number() that a bogus value should cause us to exit */ 479 #define NO_DEFAULT LONG_MIN 480 481 /* 482 * INVALIDSBLIMIT is the number of bad backup superblocks that will be 483 * tolerated before we decide to try arriving at a different set of them 484 * using a different logic. This is applicable for non-EFI disks only. 485 */ 486 #define INVALIDSBLIMIT 10 487 488 /* 489 * The *_flag variables are used to indicate that the user specified 490 * the values, rather than that we made them up ourselves. We can 491 * complain about the user giving us bogus values. 492 */ 493 494 /* semi-constants */ 495 long sectorsize = DEV_BSIZE; /* bytes/sector from param.h */ 496 long bbsize = BBSIZE; /* boot block size */ 497 long sbsize = SBSIZE; /* superblock size */ 498 499 /* parameters */ 500 diskaddr_t fssize_db; /* file system size in disk blocks */ 501 diskaddr_t fssize_frag; /* file system size in frags */ 502 long cpg; /* cylinders/cylinder group */ 503 int cpg_flag = RC_DEFAULT; 504 long rotdelay = -1; /* rotational delay between blocks */ 505 int rotdelay_flag = RC_DEFAULT; 506 long maxcontig; /* max contiguous blocks to allocate */ 507 int maxcontig_flag = RC_DEFAULT; 508 long nsect = DFLNSECT; /* sectors per track */ 509 int nsect_flag = RC_DEFAULT; 510 long ntrack = DFLNTRAK; /* tracks per cylinder group */ 511 int ntrack_flag = RC_DEFAULT; 512 long bsize = DESBLKSIZE; /* filesystem block size */ 513 int bsize_flag = RC_DEFAULT; 514 long fragsize = DESFRAGSIZE; /* filesystem fragment size */ 515 int fragsize_flag = RC_DEFAULT; 516 long minfree = MINFREE; /* fs_minfree */ 517 int minfree_flag = RC_DEFAULT; 518 long rps = DEFHZ; /* revolutions/second of drive */ 519 int rps_flag = RC_DEFAULT; 520 long nbpi = NBPI; /* number of bytes per inode */ 521 int nbpi_flag = RC_DEFAULT; 522 long nrpos = NRPOS; /* number of rotational positions */ 523 int nrpos_flag = RC_DEFAULT; 524 long apc = 0; /* alternate sectors per cylinder */ 525 int apc_flag = RC_DEFAULT; 526 char opt = 't'; /* optimization style, `t' or `s' */ 527 char mtb = 'n'; /* multi-terabyte format, 'y' or 'n' */ 528 529 long debug = 0; /* enable debugging output */ 530 531 int spc_flag = 0; /* alternate sectors specified or */ 532 /* found */ 533 534 /* global state */ 535 int Nflag; /* do not write to disk */ 536 int mflag; /* return the command line used to create this FS */ 537 int rflag; /* report the superblock in an easily-parsed form */ 538 int Rflag; /* dump the superblock in binary */ 539 char *fsys; 540 time_t mkfstime; 541 char *string; 542 int label_type; 543 544 /* 545 * logging support 546 */ 547 int ismdd; /* true if device is a SVM device */ 548 int islog; /* true if ufs or SVM logging is enabled */ 549 int islogok; /* true if ufs/SVM log state is good */ 550 551 static int isufslog; /* true if ufs logging is enabled */ 552 static int waslog; /* true when ufs logging disabled during grow */ 553 554 /* 555 * growfs defines, globals, and forward references 556 */ 557 #define NOTENOUGHSPACE 33 558 int grow; 559 static int Pflag; /* probe to which size the fs can be grown */ 560 int ismounted; 561 char *directory; 562 diskaddr_t grow_fssize; 563 long grow_fs_size; 564 long grow_fs_ncg; 565 diskaddr_t grow_fs_csaddr; 566 long grow_fs_cssize; 567 int grow_fs_clean; 568 struct csum *grow_fscs; 569 diskaddr_t grow_sifrag; 570 int test; 571 int testforce; 572 diskaddr_t testfrags; 573 int inlockexit; 574 int isbad; 575 576 void lockexit(int); 577 void randomgeneration(void); 578 void checksummarysize(void); 579 int checksblock(struct fs, int); 580 void growinit(char *); 581 void checkdev(char *, char *); 582 void checkmount(struct mnttab *, char *); 583 struct dinode *gdinode(ino_t); 584 int csfraginrange(daddr32_t); 585 struct csfrag *findcsfrag(daddr32_t, struct csfrag **); 586 void checkindirect(ino_t, daddr32_t *, daddr32_t, int); 587 void addcsfrag(ino_t, daddr32_t, struct csfrag **); 588 void delcsfrag(daddr32_t, struct csfrag **); 589 void checkdirect(ino_t, daddr32_t *, daddr32_t *, int); 590 void findcsfragino(void); 591 void fixindirect(daddr32_t, int); 592 void fixdirect(caddr_t, daddr32_t, daddr32_t *, int); 593 void fixcsfragino(void); 594 void extendsummaryinfo(void); 595 int notenoughspace(void); 596 void unalloccsfragino(void); 597 void unalloccsfragfree(void); 598 void findcsfragfree(void); 599 void copycsfragino(void); 600 void rdcg(long); 601 void wtcg(void); 602 void flcg(void); 603 void allocfrags(long, daddr32_t *, long *); 604 void alloccsfragino(void); 605 void alloccsfragfree(void); 606 void freefrags(daddr32_t, long, long); 607 int findfreerange(long *, long *); 608 void resetallocinfo(void); 609 void extendcg(long); 610 void ulockfs(void); 611 void wlockfs(void); 612 void clockfs(void); 613 void wtsb(void); 614 static int64_t checkfragallocated(daddr32_t); 615 static struct csum *read_summaryinfo(struct fs *); 616 static diskaddr_t probe_summaryinfo(); 617 618 int 619 main(int argc, char *argv[]) 620 { 621 long i, mincpc, mincpg, ibpcl; 622 long cylno, rpos, blk, j, warn = 0; 623 long mincpgcnt, maxcpg; 624 uint64_t used, bpcg, inospercg; 625 long mapcramped, inodecramped; 626 long postblsize, rotblsize, totalsbsize; 627 FILE *mnttab; 628 struct mnttab mntp; 629 char *special; 630 struct statvfs64 fs; 631 struct dk_geom dkg; 632 struct dk_cinfo dkcinfo; 633 char pbuf[sizeof (uint64_t) * 3 + 1]; 634 char *tmpbuf; 635 int width, plen; 636 uint64_t num; 637 int c, saverr; 638 diskaddr_t max_fssize; 639 long tmpmaxcontig = -1; 640 struct sigaction sigact; 641 uint64_t nbytes64; 642 int remaining_cg; 643 int do_dot = 0; 644 int use_efi_dflts = 0, retry = 0; 645 int invalid_sb_cnt, ret, skip_this_sb, cg_too_small; 646 int geom_nsect, geom_ntrack, geom_cpg; 647 648 (void) setlocale(LC_ALL, ""); 649 650 #if !defined(TEXT_DOMAIN) 651 #define TEXT_DOMAIN "SYS_TEST" 652 #endif 653 (void) textdomain(TEXT_DOMAIN); 654 655 while ((c = getopt(argc, argv, "F:bmo:VPGM:T:t:")) != EOF) { 656 switch (c) { 657 658 case 'F': 659 string = optarg; 660 if (strcmp(string, "ufs") != 0) 661 usage(); 662 break; 663 664 case 'm': /* return command line used to create this FS */ 665 mflag++; 666 break; 667 668 case 'o': 669 /* 670 * ufs specific options. 671 */ 672 string = optarg; 673 while (*string != '\0') { 674 if (match("nsect=")) { 675 nsect = number(DFLNSECT, "nsect", 0); 676 nsect_flag = RC_KEYWORD; 677 } else if (match("ntrack=")) { 678 ntrack = number(DFLNTRAK, "ntrack", 0); 679 ntrack_flag = RC_KEYWORD; 680 } else if (match("bsize=")) { 681 bsize = number(DESBLKSIZE, "bsize", 0); 682 bsize_flag = RC_KEYWORD; 683 } else if (match("fragsize=")) { 684 fragsize = number(DESFRAGSIZE, 685 "fragsize", 0); 686 fragsize_flag = RC_KEYWORD; 687 } else if (match("cgsize=")) { 688 cpg = number(DESCPG, "cgsize", 0); 689 cpg_flag = RC_KEYWORD; 690 } else if (match("free=")) { 691 minfree = number(MINFREE, "free", 692 ALLOW_PERCENT); 693 minfree_flag = RC_KEYWORD; 694 } else if (match("maxcontig=")) { 695 tmpmaxcontig = 696 number(-1, "maxcontig", 0); 697 maxcontig_flag = RC_KEYWORD; 698 } else if (match("nrpos=")) { 699 nrpos = number(NRPOS, "nrpos", 0); 700 nrpos_flag = RC_KEYWORD; 701 } else if (match("rps=")) { 702 rps = number(DEFHZ, "rps", 0); 703 rps_flag = RC_KEYWORD; 704 } else if (match("nbpi=")) { 705 nbpi = number(NBPI, "nbpi", 0); 706 nbpi_flag = RC_KEYWORD; 707 } else if (match("opt=")) { 708 opt = checkopt(string); 709 } else if (match("mtb=")) { 710 mtb = checkmtb(string); 711 } else if (match("apc=")) { 712 apc = number(0, "apc", 0); 713 apc_flag = RC_KEYWORD; 714 } else if (match("gap=")) { 715 (void) number(0, "gap", ALLOW_MS1); 716 rotdelay = ROTDELAY; 717 rotdelay_flag = RC_DEFAULT; 718 } else if (match("debug=")) { 719 debug = number(0, "debug", 0); 720 } else if (match("N")) { 721 Nflag++; 722 } else if (match("calcsb")) { 723 rflag++; 724 Nflag++; 725 } else if (match("calcbinsb")) { 726 rflag++; 727 Rflag++; 728 Nflag++; 729 } else if (*string == '\0') { 730 break; 731 } else { 732 (void) fprintf(stderr, gettext( 733 "illegal option: %s\n"), 734 string); 735 usage(); 736 } 737 738 if (*string == ',') string++; 739 if (*string == ' ') string++; 740 } 741 break; 742 743 case 'V': 744 { 745 char *opt_text; 746 int opt_count; 747 748 (void) fprintf(stdout, gettext("mkfs -F ufs ")); 749 for (opt_count = 1; opt_count < argc; 750 opt_count++) { 751 opt_text = argv[opt_count]; 752 if (opt_text) 753 (void) fprintf(stdout, " %s ", 754 opt_text); 755 } 756 (void) fprintf(stdout, "\n"); 757 } 758 break; 759 760 case 'b': /* do nothing for this */ 761 break; 762 763 case 'M': /* grow the mounted file system */ 764 directory = optarg; 765 766 /* FALLTHROUGH */ 767 case 'G': /* grow the file system */ 768 grow = 1; 769 break; 770 case 'P': /* probe the file system growing size */ 771 Pflag = 1; 772 grow = 1; /* probe mode implies fs growing */ 773 break; 774 case 'T': /* For testing */ 775 testforce = 1; 776 777 /* FALLTHROUGH */ 778 case 't': 779 test = 1; 780 string = optarg; 781 testfrags = number(NO_DEFAULT, "testfrags", 0); 782 break; 783 784 case '?': 785 usage(); 786 break; 787 } 788 } 789 #ifdef MKFS_DEBUG 790 /* 791 * Turning on MKFS_DEBUG causes mkfs to produce a filesystem 792 * that can be reproduced by setting the time to 0 and seeding 793 * the random number generator to a constant. 794 */ 795 mkfstime = 0; /* reproducible results */ 796 #else 797 (void) time(&mkfstime); 798 #endif 799 800 if (optind >= (argc - 1)) { 801 if (optind > (argc - 1)) { 802 (void) fprintf(stderr, 803 gettext("special not specified\n")); 804 usage(); 805 } else if (mflag == 0) { 806 (void) fprintf(stderr, 807 gettext("size not specified\n")); 808 usage(); 809 } 810 } 811 argc -= optind; 812 argv = &argv[optind]; 813 814 fsys = argv[0]; 815 fsi = open64(fsys, O_RDONLY); 816 if (fsi < 0) { 817 (void) fprintf(stderr, gettext("%s: cannot open\n"), fsys); 818 lockexit(32); 819 } 820 821 if (mflag) { 822 dump_fscmd(fsys, fsi); 823 lockexit(0); 824 } 825 826 /* 827 * The task of setting all of the configuration parameters for a 828 * UFS file system is basically a matter of solving n equations 829 * in m variables. Typically, m is greater than n, so there is 830 * usually more than one valid solution. Since this is usually 831 * an under-constrained problem, it's not always obvious what the 832 * "best" configuration is. 833 * 834 * In general, the approach is to 835 * 1. Determine the values for the file system parameters 836 * that are externally contrained and therefore not adjustable 837 * by mkfs (such as the device's size and maxtransfer size). 838 * 2. Acquire the user's requested setting for all configuration 839 * values that can be set on the command line. 840 * 3. Determine the final value of all configuration values, by 841 * the following approach: 842 * - set the file system block size (fs_bsize). Although 843 * this could be regarded as an adjustable parameter, in 844 * fact, it's pretty much a constant. At this time, it's 845 * generally set to 8k (with older hardware, it can 846 * sometimes make sense to set it to 4k, but those 847 * situations are pretty rare now). 848 * - re-adjust the maximum file system size based on the 849 * value of the file system block size. Since the 850 * frag size can't be any larger than a file system 851 * block, and the number of frags in the file system 852 * has to fit into 31 bits, the file system block size 853 * affects the maximum file system size. 854 * - now that the real maximum file system is known, set the 855 * actual size of the file system to be created to 856 * MIN(requested size, maximum file system size). 857 * - now validate, and if necessary, adjust the following 858 * values: 859 * rotdelay 860 * nsect 861 * maxcontig 862 * apc 863 * frag_size 864 * rps 865 * minfree 866 * nrpos 867 * nrack 868 * nbpi 869 * - calculate maxcpg (the maximum value of the cylinders-per- 870 * cylinder-group configuration parameters). There are two 871 * algorithms for calculating maxcpg: an old one, which is 872 * used for file systems of less than 1 terabyte, and a 873 * new one, implemented in the function compute_maxcpg(), 874 * which is used for file systems of greater than 1 TB. 875 * The difference between them is that compute_maxcpg() 876 * really tries to maximize the cpg value. The old 877 * algorithm fails to take advantage of smaller frags and 878 * lower inode density when determining the maximum cpg, 879 * and thus comes up with much lower numbers in some 880 * configurations. At some point, we might use the 881 * new algorithm for determining maxcpg for all file 882 * systems, but at this time, the changes implemented for 883 * multi-terabyte UFS are NOT being automatically applied 884 * to UFS file systems of less than a terabyte (in the 885 * interest of not changing existing UFS policy too much 886 * until the ramifications of the changes are well-understood 887 * and have been evaluated for their effects on performance.) 888 * - check the current values of the configuration parameters 889 * against the various constraints imposed by UFS. These 890 * include: 891 * * There must be at least one inode in each 892 * cylinder group. 893 * * The cylinder group overhead block, which 894 * contains the inode and frag bigmaps, must fit 895 * within one file system block. 896 * * The space required for inode maps should 897 * occupy no more than a third of the cylinder 898 * group overhead block. 899 * * The rotational position tables have to fit 900 * within the available space in the super block. 901 * Adjust the configuration values that can be adjusted 902 * so that these constraints are satisfied. The 903 * configuration values that are adjustable are: 904 * * frag size 905 * * cylinders per group 906 * * inode density (can be increased) 907 * * number of rotational positions (the rotational 908 * position tables are eliminated altogether if 909 * there isn't enough room for them.) 910 * 4. Set the values for all the dependent configuration 911 * values (those that aren't settable on the command 912 * line and which are completely dependent on the 913 * adjustable parameters). This include cpc (cycles 914 * per cylinder, spc (sectors-per-cylinder), and many others. 915 */ 916 917 max_fssize = get_max_size(fsi); 918 919 /* 920 * Get and check positional arguments, if any. 921 */ 922 switch (argc - 1) { 923 default: 924 usage(); 925 /*NOTREACHED*/ 926 case 15: 927 mtb = checkmtb(argv[15]); 928 /* FALLTHROUGH */ 929 case 14: 930 string = argv[14]; 931 tmpmaxcontig = number(-1, "maxcontig", 0); 932 maxcontig_flag = RC_POSITIONAL; 933 /* FALLTHROUGH */ 934 case 13: 935 string = argv[13]; 936 nrpos = number(NRPOS, "nrpos", 0); 937 nrpos_flag = RC_POSITIONAL; 938 /* FALLTHROUGH */ 939 case 12: 940 string = argv[12]; 941 rotdelay = ROTDELAY; 942 rotdelay_flag = RC_DEFAULT; 943 /* FALLTHROUGH */ 944 case 11: 945 string = argv[11]; 946 apc = number(0, "apc", 0); 947 apc_flag = RC_POSITIONAL; 948 /* FALLTHROUGH */ 949 case 10: 950 opt = checkopt(argv[10]); 951 /* FALLTHROUGH */ 952 case 9: 953 string = argv[9]; 954 nbpi = number(NBPI, "nbpi", 0); 955 nbpi_flag = RC_POSITIONAL; 956 /* FALLTHROUGH */ 957 case 8: 958 string = argv[8]; 959 rps = number(DEFHZ, "rps", 0); 960 rps_flag = RC_POSITIONAL; 961 /* FALLTHROUGH */ 962 case 7: 963 string = argv[7]; 964 minfree = number(MINFREE, "free", ALLOW_PERCENT); 965 minfree_flag = RC_POSITIONAL; 966 /* FALLTHROUGH */ 967 case 6: 968 string = argv[6]; 969 cpg = number(DESCPG, "cgsize", 0); 970 cpg_flag = RC_POSITIONAL; 971 /* FALLTHROUGH */ 972 case 5: 973 string = argv[5]; 974 fragsize = number(DESFRAGSIZE, "fragsize", 0); 975 fragsize_flag = RC_POSITIONAL; 976 /* FALLTHROUGH */ 977 case 4: 978 string = argv[4]; 979 bsize = number(DESBLKSIZE, "bsize", 0); 980 bsize_flag = RC_POSITIONAL; 981 /* FALLTHROUGH */ 982 case 3: 983 string = argv[3]; 984 ntrack = number(DFLNTRAK, "ntrack", 0); 985 ntrack_flag = RC_POSITIONAL; 986 /* FALLTHROUGH */ 987 case 2: 988 string = argv[2]; 989 nsect = number(DFLNSECT, "nsect", 0); 990 nsect_flag = RC_POSITIONAL; 991 /* FALLTHROUGH */ 992 case 1: 993 string = argv[1]; 994 fssize_db = number(max_fssize, "size", 0); 995 } 996 997 998 if ((maxcontig_flag == RC_DEFAULT) || (tmpmaxcontig == -1) || 999 (maxcontig == -1)) { 1000 long maxtrax = get_max_track_size(fsi); 1001 maxcontig = maxtrax / bsize; 1002 1003 } else { 1004 maxcontig = tmpmaxcontig; 1005 } 1006 dprintf(("DeBuG maxcontig : %ld\n", maxcontig)); 1007 1008 if (rotdelay == -1) { /* default by newfs and mkfs */ 1009 rotdelay = ROTDELAY; 1010 } 1011 1012 if (cpg_flag == RC_DEFAULT) { /* If not explicity set, use default */ 1013 cpg = DESCPG; 1014 } 1015 dprintf(("DeBuG cpg : %ld\n", cpg)); 1016 1017 /* 1018 * Now that we have the semi-sane args, either positional, via -o, 1019 * or by defaulting, handle inter-dependencies and range checks. 1020 */ 1021 1022 /* 1023 * Settle the file system block size first, since it's a fixed 1024 * parameter once set and so many other parameters, including 1025 * max_fssize, depend on it. 1026 */ 1027 range_check(&bsize, "bsize", MINBSIZE, MAXBSIZE, DESBLKSIZE, 1028 bsize_flag); 1029 1030 if (!POWEROF2(bsize)) { 1031 (void) fprintf(stderr, 1032 gettext("block size must be a power of 2, not %ld\n"), 1033 bsize); 1034 bsize = DESBLKSIZE; 1035 (void) fprintf(stderr, 1036 gettext("mkfs: bsize reset to default %ld\n"), 1037 bsize); 1038 } 1039 1040 if (fssize_db > max_fssize && validate_size(fsi, fssize_db)) { 1041 (void) fprintf(stderr, gettext( 1042 "Warning: the requested size of this file system\n" 1043 "(%lld sectors) is greater than the size of the\n" 1044 "device reported by the driver (%lld sectors).\n" 1045 "However, a read of the device at the requested size\n" 1046 "does succeed, so the requested size will be used.\n"), 1047 fssize_db, max_fssize); 1048 max_fssize = fssize_db; 1049 } 1050 /* 1051 * Since the maximum allocatable unit (the frag) must be less than 1052 * or equal to bsize, and the number of frags must be less than or 1053 * equal to INT_MAX, the total size of the file system (in 1054 * bytes) must be less than or equal to bsize * INT_MAX. 1055 */ 1056 1057 if (max_fssize > ((diskaddr_t)bsize/DEV_BSIZE) * INT_MAX) 1058 max_fssize = ((diskaddr_t)bsize/DEV_BSIZE) * INT_MAX; 1059 range_check_64(&fssize_db, "size", 1024LL, max_fssize, max_fssize, 1); 1060 1061 if (fssize_db >= SECTORS_PER_TERABYTE) { 1062 mtb = 'y'; 1063 if (!in_64bit_mode()) { 1064 (void) fprintf(stderr, gettext( 1065 "mkfs: Warning: Creating a file system greater than 1 terabyte on a\n" 1066 " system running a 32-bit kernel. This file system will not be\n" 1067 " accessible until the system is rebooted with a 64-bit kernel.\n")); 1068 } 1069 } 1070 1071 /* 1072 * With newer and much larger disks, the newfs(1M) and mkfs_ufs(1M) 1073 * commands had problems in correctly handling the "native" geometries 1074 * for various storage devices. 1075 * 1076 * To handle the new age disks, mkfs_ufs(1M) will use the EFI style 1077 * for non-EFI disks that are larger than the CHS addressing limit 1078 * ( > 8GB approx ) and ignore the disk geometry information for 1079 * these drives. This is what is currently done for multi-terrabyte 1080 * filesystems on EFI disks. 1081 * 1082 * However if the user asked for a specific layout by supplying values 1083 * for even one of the three parameters (nsect, ntrack, cpg), honour 1084 * the user supplied parameters. 1085 * 1086 * Choosing EFI style or native geometry style can make a lot of 1087 * difference, because the size of a cylinder group is dependent on 1088 * this choice. This in turn means that the position of alternate 1089 * superblocks varies depending on the style chosen. It is not 1090 * necessary that all disks of size > CHSLIMIT have EFI style layout. 1091 * There can be disks which are > CHSLIMIT size, but have native 1092 * geometry style layout, thereby warranting the need for alternate 1093 * logic in superblock detection. 1094 */ 1095 1096 if (mtb != 'y' && label_type == LABEL_TYPE_VTOC && 1097 ((ntrack == -1 || (grow && ntrack_flag == RC_DEFAULT)) || 1098 (nsect_flag == RC_DEFAULT && ntrack_flag == RC_DEFAULT && 1099 cpg_flag == RC_DEFAULT))) { 1100 /* 1101 * "-1" indicates that we were called from newfs and ntracks 1102 * was not specified in newfs command line. Calculate nsect 1103 * and ntrack in the same manner as newfs. 1104 * 1105 * This is required because, the defaults for nsect and ntrack 1106 * is hardcoded in mkfs, whereas to generate the alternate 1107 * superblock locations for the -N option, there is a need for 1108 * the geometry based values that newfs would have arrived at. 1109 * Newfs would have arrived at these values as below. 1110 */ 1111 1112 if (ioctl(fsi, DKIOCGGEOM, &dkg)) { 1113 dprintf(("%s: Unable to read Disk geometry", fsys)); 1114 perror(gettext("Unable to read Disk geometry")); 1115 lockexit(32); 1116 } else { 1117 nsect = dkg.dkg_nsect; 1118 ntrack = dkg.dkg_nhead; 1119 #ifdef i386 /* Bug 1170182 */ 1120 if (ntrack > 32 && (ntrack % 16) != 0) { 1121 ntrack -= (ntrack % 16); 1122 } 1123 #endif 1124 if ((dkg.dkg_ncyl * dkg.dkg_nhead * dkg.dkg_nsect) 1125 > CHSLIMIT) { 1126 use_efi_dflts = 1; 1127 retry = 1; 1128 } 1129 } 1130 dprintf(("DeBuG CHSLIMIT = %d geom = %ld\n", CHSLIMIT, 1131 dkg.dkg_ncyl * dkg.dkg_nhead * dkg.dkg_nsect)); 1132 } 1133 1134 /* 1135 * For the newfs -N case, even if the disksize is > CHSLIMIT, do not 1136 * blindly follow EFI style. If the fs_version indicates a geometry 1137 * based layout, try that one first. If it fails we can always try the 1138 * other logic. 1139 * 1140 * If we were called from growfs, we will have a problem if we mix 1141 * and match the filesystem creation and growth styles. For example, 1142 * if we create using EFI style, we have to also grow using EFI 1143 * style. So follow the style indicated by the fs_version. 1144 * 1145 * Read and verify the primary superblock. If it looks sane, use the 1146 * fs_version from the superblock. If the primary superblock does 1147 * not look good, read and verify the first alternate superblock at 1148 * ALTSB. Use the fs_version to decide whether to use the 1149 * EFI style logic or the old geometry based logic to calculate 1150 * the alternate superblock locations. 1151 */ 1152 if ((Nflag && use_efi_dflts) || (grow)) { 1153 if (grow && ntrack_flag != RC_DEFAULT) 1154 goto start_fs_creation; 1155 rdfs((diskaddr_t)(SBOFF / sectorsize), (int)sbsize, 1156 (char *)&altsblock); 1157 ret = checksblock(altsblock, 1); 1158 1159 if (!ret) { 1160 if (altsblock.fs_magic == MTB_UFS_MAGIC) { 1161 mtb = 'y'; 1162 goto start_fs_creation; 1163 } 1164 use_efi_dflts = (altsblock.fs_version == 1165 UFS_EFISTYLE4NONEFI_VERSION_2) ? 1 : 0; 1166 } else { 1167 /* 1168 * The primary superblock didn't help in determining 1169 * the fs_version. Try the first alternate superblock. 1170 */ 1171 dprintf(("DeBuG checksblock() failed - error : %d" 1172 " for sb : %d\n", ret, SBOFF/sectorsize)); 1173 rdfs((diskaddr_t)ALTSB, (int)sbsize, 1174 (char *)&altsblock); 1175 ret = checksblock(altsblock, 1); 1176 1177 if (!ret) { 1178 if (altsblock.fs_magic == MTB_UFS_MAGIC) { 1179 mtb = 'y'; 1180 goto start_fs_creation; 1181 } 1182 use_efi_dflts = (altsblock.fs_version == 1183 UFS_EFISTYLE4NONEFI_VERSION_2) ? 1 : 0; 1184 } 1185 dprintf(("DeBuG checksblock() returned : %d" 1186 " for sb : %d\n", ret, ALTSB)); 1187 } 1188 } 1189 1190 geom_nsect = nsect; 1191 geom_ntrack = ntrack; 1192 geom_cpg = cpg; 1193 dprintf(("DeBuG geom_nsect=%d, geom_ntrack=%d, geom_cpg=%d\n", 1194 geom_nsect, geom_ntrack, geom_cpg)); 1195 1196 start_fs_creation: 1197 retry_alternate_logic: 1198 invalid_sb_cnt = 0; 1199 cg_too_small = 0; 1200 if (use_efi_dflts) { 1201 nsect = DEF_SECTORS_EFI; 1202 ntrack = DEF_TRACKS_EFI; 1203 cpg = DESCPG; 1204 dprintf(("\nDeBuG Using EFI defaults\n")); 1205 } else { 1206 nsect = geom_nsect; 1207 ntrack = geom_ntrack; 1208 cpg = geom_cpg; 1209 dprintf(("\nDeBuG Using Geometry\n")); 1210 /* 1211 * 32K based on max block size of 64K, and rotational layout 1212 * test of nsect <= (256 * sectors/block). Current block size 1213 * limit is not 64K, but it's growing soon. 1214 */ 1215 range_check(&nsect, "nsect", 1, 32768, DFLNSECT, nsect_flag); 1216 /* 1217 * ntrack is the number of tracks per cylinder. 1218 * The ntrack value must be between 1 and the total number of 1219 * sectors in the file system. 1220 */ 1221 range_check(&ntrack, "ntrack", 1, 1222 fssize_db > INT_MAX ? INT_MAX : (uint32_t)fssize_db, 1223 DFLNTRAK, ntrack_flag); 1224 } 1225 1226 range_check(&apc, "apc", 0, nsect - 1, 0, apc_flag); 1227 1228 if (mtb == 'y') 1229 fragsize = bsize; 1230 1231 range_check(&fragsize, "fragsize", sectorsize, bsize, 1232 MAX(bsize / MAXFRAG, MIN(DESFRAGSIZE, bsize)), fragsize_flag); 1233 1234 if ((bsize / MAXFRAG) > fragsize) { 1235 (void) fprintf(stderr, gettext( 1236 "fragment size %ld is too small, minimum with block size %ld is %ld\n"), 1237 fragsize, bsize, bsize / MAXFRAG); 1238 (void) fprintf(stderr, 1239 gettext("mkfs: fragsize reset to minimum %ld\n"), 1240 bsize / MAXFRAG); 1241 fragsize = bsize / MAXFRAG; 1242 } 1243 1244 if (!POWEROF2(fragsize)) { 1245 (void) fprintf(stderr, 1246 gettext("fragment size must be a power of 2, not %ld\n"), 1247 fragsize); 1248 fragsize = MAX(bsize / MAXFRAG, MIN(DESFRAGSIZE, bsize)); 1249 (void) fprintf(stderr, 1250 gettext("mkfs: fragsize reset to %ld\n"), 1251 fragsize); 1252 } 1253 1254 /* At this point, bsize must be >= fragsize, so no need to check it */ 1255 1256 if (bsize < PAGESIZE) { 1257 (void) fprintf(stderr, gettext( 1258 "WARNING: filesystem block size (%ld) is smaller than " 1259 "memory page size (%ld).\nResulting filesystem can not be " 1260 "mounted on this system.\n\n"), 1261 bsize, (long)PAGESIZE); 1262 } 1263 1264 range_check(&rps, "rps", 1, 1000, DEFHZ, rps_flag); 1265 range_check(&minfree, "free", 0, 99, MINFREE, minfree_flag); 1266 range_check(&nrpos, "nrpos", 1, nsect, MIN(nsect, NRPOS), nrpos_flag); 1267 1268 /* 1269 * nbpi is variable, but 2MB seems a reasonable upper limit, 1270 * as 4MB tends to cause problems (using otherwise-default 1271 * parameters). The true limit is where we end up with one 1272 * inode per cylinder group. If this file system is being 1273 * configured for multi-terabyte access, nbpi must be at least 1MB. 1274 */ 1275 if (mtb == 'y' && nbpi < MTB_NBPI) { 1276 (void) fprintf(stderr, gettext("mkfs: bad value for nbpi: " 1277 "must be at least 1048576 for multi-terabyte, " 1278 "nbpi reset to default 1048576\n")); 1279 nbpi = MTB_NBPI; 1280 } 1281 1282 if (mtb == 'y') 1283 range_check(&nbpi, "nbpi", MTB_NBPI, 2 * MB, MTB_NBPI, 1284 nbpi_flag); 1285 else 1286 range_check(&nbpi, "nbpi", DEV_BSIZE, 2 * MB, NBPI, nbpi_flag); 1287 1288 /* 1289 * maxcpg is another variably-limited parameter. Calculate 1290 * the limit based on what we've got for its dependent 1291 * variables. Effectively, it's how much space is left in the 1292 * superblock after all the other bits are accounted for. We 1293 * only fill in sblock fields so we can use MAXIpG. 1294 * 1295 * If the calculation of maxcpg below (for the mtb == 'n' 1296 * case) is changed, update newfs as well. 1297 * 1298 * For old-style, non-MTB format file systems, use the old 1299 * algorithm for calculating the maximum cylinder group size, 1300 * even though it limits the cylinder group more than necessary. 1301 * Since layout can affect performance, we don't want to change 1302 * the default layout for non-MTB file systems at this time. 1303 * However, for MTB file systems, use the new maxcpg calculation, 1304 * which really maxes out the cylinder group size. 1305 */ 1306 1307 sblock.fs_bsize = bsize; 1308 sblock.fs_inopb = sblock.fs_bsize / sizeof (struct dinode); 1309 1310 if (mtb == 'n') { 1311 maxcpg = (bsize - sizeof (struct cg) - 1312 howmany(MAXIpG(&sblock), NBBY)) / 1313 (sizeof (long) + nrpos * sizeof (short) + 1314 nsect / (MAXFRAG * NBBY)); 1315 } else { 1316 maxcpg = compute_maxcpg(bsize, fragsize, nbpi, nrpos, 1317 nsect * ntrack); 1318 } 1319 1320 dprintf(("DeBuG cpg : %ld\n", cpg)); 1321 if (cpg == -1) 1322 cpg = maxcpg; 1323 dprintf(("DeBuG cpg : %ld\n", cpg)); 1324 1325 /* 1326 * mincpg is variable in complex ways, so we really can't 1327 * do a sane lower-end limit check at this point. 1328 */ 1329 range_check(&cpg, "cgsize", 1, maxcpg, MIN(maxcpg, DESCPG), cpg_flag); 1330 1331 /* 1332 * get the controller info 1333 */ 1334 ismdd = 0; 1335 islog = 0; 1336 islogok = 0; 1337 waslog = 0; 1338 1339 if (ioctl(fsi, DKIOCINFO, &dkcinfo) == 0) 1340 /* 1341 * if it is an MDD (disksuite) device 1342 */ 1343 if (dkcinfo.dki_ctype == DKC_MD) { 1344 ismdd++; 1345 /* 1346 * check the logging device 1347 */ 1348 if (ioctl(fsi, _FIOISLOG, NULL) == 0) { 1349 islog++; 1350 if (ioctl(fsi, _FIOISLOGOK, NULL) == 0) 1351 islogok++; 1352 } 1353 } 1354 1355 /* 1356 * Do not grow the file system, but print on stdout the maximum 1357 * size in sectors to which the file system can be increased. 1358 * The calculated size is limited by fssize_db. 1359 * Note that we don't lock the filesystem and therefore under rare 1360 * conditions (the filesystem is mounted, the free block count is 1361 * almost zero, and the superuser is still changing it) the calculated 1362 * size can be imprecise. 1363 */ 1364 if (Pflag) { 1365 (void) printf("%llu\n", probe_summaryinfo()); 1366 exit(0); 1367 } 1368 1369 /* 1370 * If we're growing an existing filesystem, then we're about 1371 * to start doing things that can require recovery efforts if 1372 * we get interrupted, so make sure we get a chance to do so. 1373 */ 1374 if (grow) { 1375 sigact.sa_handler = recover_from_sigint; 1376 sigemptyset(&sigact.sa_mask); 1377 sigact.sa_flags = SA_RESTART; 1378 1379 if (sigaction(SIGINT, &sigact, (struct sigaction *)NULL) < 0) { 1380 perror(gettext("Could not register SIGINT handler")); 1381 lockexit(3); 1382 } 1383 } 1384 1385 if (!Nflag) { 1386 /* 1387 * Check if MNTTAB is trustable 1388 */ 1389 if (statvfs64(MNTTAB, &fs) < 0) { 1390 (void) fprintf(stderr, gettext("can't statvfs %s\n"), 1391 MNTTAB); 1392 exit(32); 1393 } 1394 1395 if (strcmp(MNTTYPE_MNTFS, fs.f_basetype) != 0) { 1396 (void) fprintf(stderr, gettext( 1397 "%s file system type is not %s, can't mkfs\n"), 1398 MNTTAB, MNTTYPE_MNTFS); 1399 exit(32); 1400 } 1401 1402 special = getfullblkname(fsys); 1403 checkdev(fsys, special); 1404 1405 /* 1406 * If we found the block device name, 1407 * then check the mount table. 1408 * if mounted, and growing write lock the file system 1409 * 1410 */ 1411 if ((special != NULL) && (*special != '\0')) { 1412 if ((mnttab = fopen(MNTTAB, "r")) == NULL) { 1413 (void) fprintf(stderr, gettext( 1414 "can't open %s\n"), MNTTAB); 1415 exit(32); 1416 } 1417 while ((getmntent(mnttab, &mntp)) == NULL) { 1418 if (grow) { 1419 checkmount(&mntp, special); 1420 continue; 1421 } 1422 if (strcmp(special, mntp.mnt_special) == 0) { 1423 (void) fprintf(stderr, gettext( 1424 "%s is mounted, can't mkfs\n"), 1425 special); 1426 exit(32); 1427 } 1428 } 1429 (void) fclose(mnttab); 1430 } 1431 1432 if (directory && (ismounted == 0)) { 1433 (void) fprintf(stderr, gettext("%s is not mounted\n"), 1434 special); 1435 lockexit(32); 1436 } 1437 1438 fso = (grow) ? open64(fsys, O_WRONLY) : creat64(fsys, 0666); 1439 if (fso < 0) { 1440 saverr = errno; 1441 (void) fprintf(stderr, 1442 gettext("%s: cannot create: %s\n"), 1443 fsys, strerror(saverr)); 1444 lockexit(32); 1445 } 1446 1447 } else { 1448 1449 /* 1450 * For the -N case, a file descriptor is needed for the llseek() 1451 * in wtfs(). See the comment in wtfs() for more information. 1452 * 1453 * Get a file descriptor that's read-only so that this code 1454 * doesn't accidentally write to the file. 1455 */ 1456 fso = open64(fsys, O_RDONLY); 1457 if (fso < 0) { 1458 saverr = errno; 1459 (void) fprintf(stderr, gettext("%s: cannot open: %s\n"), 1460 fsys, strerror(saverr)); 1461 lockexit(32); 1462 } 1463 } 1464 1465 /* 1466 * seed random # generator (for ic_generation) 1467 */ 1468 #ifdef MKFS_DEBUG 1469 srand48(12962); /* reproducible results */ 1470 #else 1471 srand48((long)(time((time_t *)NULL) + getpid())); 1472 #endif 1473 1474 if (grow) { 1475 growinit(fsys); 1476 goto grow00; 1477 } 1478 1479 /* 1480 * Validate the given file system size. 1481 * Verify that its last block can actually be accessed. 1482 * 1483 * Note: it's ok to use sblock as a buffer because it is immediately 1484 * overwritten by the rdfs() of the superblock in the next line. 1485 * 1486 * ToDo: Because the size checking is done in rdfs()/wtfs(), the 1487 * error message for specifying an illegal size is very unfriendly. 1488 * In the future, one could replace the rdfs()/wtfs() calls 1489 * below with in-line calls to read() or write(). This allows better 1490 * error messages to be put in place. 1491 */ 1492 rdfs(fssize_db - 1, (int)sectorsize, (char *)&sblock); 1493 1494 /* 1495 * make the fs unmountable 1496 */ 1497 rdfs((diskaddr_t)(SBOFF / sectorsize), (int)sbsize, (char *)&sblock); 1498 sblock.fs_magic = -1; 1499 sblock.fs_clean = FSBAD; 1500 sblock.fs_state = FSOKAY - sblock.fs_time; 1501 wtfs((diskaddr_t)(SBOFF / sectorsize), (int)sbsize, (char *)&sblock); 1502 bzero(&sblock, (size_t)sbsize); 1503 1504 sblock.fs_nsect = nsect; 1505 sblock.fs_ntrak = ntrack; 1506 1507 /* 1508 * Validate specified/determined spc 1509 * and calculate minimum cylinders per group. 1510 */ 1511 1512 /* 1513 * sectors/cyl = tracks/cyl * sectors/track 1514 */ 1515 sblock.fs_spc = sblock.fs_ntrak * sblock.fs_nsect; 1516 1517 grow00: 1518 if (apc_flag) { 1519 sblock.fs_spc -= apc; 1520 } 1521 /* 1522 * Have to test for this separately from apc_flag, due to 1523 * the growfs case.... 1524 */ 1525 if (sblock.fs_spc != sblock.fs_ntrak * sblock.fs_nsect) { 1526 spc_flag = 1; 1527 } 1528 if (grow) 1529 goto grow10; 1530 1531 sblock.fs_nrpos = nrpos; 1532 sblock.fs_bsize = bsize; 1533 sblock.fs_fsize = fragsize; 1534 sblock.fs_minfree = minfree; 1535 1536 grow10: 1537 if (nbpi < sblock.fs_fsize) { 1538 (void) fprintf(stderr, gettext( 1539 "warning: wasteful data byte allocation / inode (nbpi):\n")); 1540 (void) fprintf(stderr, gettext( 1541 "%ld smaller than allocatable fragment size of %d\n"), 1542 nbpi, sblock.fs_fsize); 1543 } 1544 if (grow) 1545 goto grow20; 1546 1547 if (opt == 's') 1548 sblock.fs_optim = FS_OPTSPACE; 1549 else 1550 sblock.fs_optim = FS_OPTTIME; 1551 1552 sblock.fs_bmask = ~(sblock.fs_bsize - 1); 1553 sblock.fs_fmask = ~(sblock.fs_fsize - 1); 1554 /* 1555 * Planning now for future expansion. 1556 */ 1557 #if defined(_BIG_ENDIAN) 1558 sblock.fs_qbmask.val[0] = 0; 1559 sblock.fs_qbmask.val[1] = ~sblock.fs_bmask; 1560 sblock.fs_qfmask.val[0] = 0; 1561 sblock.fs_qfmask.val[1] = ~sblock.fs_fmask; 1562 #endif 1563 #if defined(_LITTLE_ENDIAN) 1564 sblock.fs_qbmask.val[0] = ~sblock.fs_bmask; 1565 sblock.fs_qbmask.val[1] = 0; 1566 sblock.fs_qfmask.val[0] = ~sblock.fs_fmask; 1567 sblock.fs_qfmask.val[1] = 0; 1568 #endif 1569 for (sblock.fs_bshift = 0, i = sblock.fs_bsize; i > 1; i >>= 1) 1570 sblock.fs_bshift++; 1571 for (sblock.fs_fshift = 0, i = sblock.fs_fsize; i > 1; i >>= 1) 1572 sblock.fs_fshift++; 1573 sblock.fs_frag = numfrags(&sblock, sblock.fs_bsize); 1574 for (sblock.fs_fragshift = 0, i = sblock.fs_frag; i > 1; i >>= 1) 1575 sblock.fs_fragshift++; 1576 if (sblock.fs_frag > MAXFRAG) { 1577 (void) fprintf(stderr, gettext( 1578 "fragment size %d is too small, minimum with block size %d is %d\n"), 1579 sblock.fs_fsize, sblock.fs_bsize, 1580 sblock.fs_bsize / MAXFRAG); 1581 lockexit(32); 1582 } 1583 sblock.fs_nindir = sblock.fs_bsize / sizeof (daddr32_t); 1584 sblock.fs_inopb = sblock.fs_bsize / sizeof (struct dinode); 1585 sblock.fs_nspf = sblock.fs_fsize / sectorsize; 1586 for (sblock.fs_fsbtodb = 0, i = NSPF(&sblock); i > 1; i >>= 1) 1587 sblock.fs_fsbtodb++; 1588 1589 /* 1590 * Compute the super-block, cylinder group, and inode blocks. 1591 * Note that these "blkno" are really fragment addresses. 1592 * For example, on an 8K/1K (block/fragment) system, fs_sblkno is 16, 1593 * fs_cblkno is 24, and fs_iblkno is 32. This is why CGSIZE is so 1594 * important: only 1 FS block is allocated for the cg struct (fragment 1595 * numbers 24 through 31). 1596 */ 1597 sblock.fs_sblkno = 1598 roundup(howmany(bbsize + sbsize, sblock.fs_fsize), sblock.fs_frag); 1599 sblock.fs_cblkno = (daddr32_t)(sblock.fs_sblkno + 1600 roundup(howmany(sbsize, sblock.fs_fsize), sblock.fs_frag)); 1601 sblock.fs_iblkno = sblock.fs_cblkno + sblock.fs_frag; 1602 1603 sblock.fs_cgoffset = roundup( 1604 howmany(sblock.fs_nsect, NSPF(&sblock)), sblock.fs_frag); 1605 for (sblock.fs_cgmask = -1, i = sblock.fs_ntrak; i > 1; i >>= 1) 1606 sblock.fs_cgmask <<= 1; 1607 if (!POWEROF2(sblock.fs_ntrak)) 1608 sblock.fs_cgmask <<= 1; 1609 /* 1610 * Validate specified/determined spc 1611 * and calculate minimum cylinders per group. 1612 */ 1613 1614 for (sblock.fs_cpc = NSPB(&sblock), i = sblock.fs_spc; 1615 sblock.fs_cpc > 1 && (i & 1) == 0; 1616 sblock.fs_cpc >>= 1, i >>= 1) 1617 /* void */; 1618 mincpc = sblock.fs_cpc; 1619 1620 /* if these calculations are changed, check dump_fscmd also */ 1621 bpcg = (uint64_t)sblock.fs_spc * sectorsize; 1622 inospercg = (uint64_t)roundup(bpcg / sizeof (struct dinode), 1623 INOPB(&sblock)); 1624 if (inospercg > MAXIpG(&sblock)) 1625 inospercg = MAXIpG(&sblock); 1626 used = (uint64_t)(sblock.fs_iblkno + inospercg / 1627 INOPF(&sblock)) * NSPF(&sblock); 1628 mincpgcnt = (long)howmany((uint64_t)sblock.fs_cgoffset * 1629 (~sblock.fs_cgmask) + used, sblock.fs_spc); 1630 mincpg = roundup(mincpgcnt, mincpc); 1631 /* 1632 * Insure that cylinder group with mincpg has enough space 1633 * for block maps 1634 */ 1635 sblock.fs_cpg = mincpg; 1636 sblock.fs_ipg = (int32_t)inospercg; 1637 mapcramped = 0; 1638 1639 /* 1640 * Make sure the cg struct fits within the file system block. 1641 * Use larger block sizes until it fits 1642 */ 1643 while (CGSIZE(&sblock) > sblock.fs_bsize) { 1644 mapcramped = 1; 1645 if (sblock.fs_bsize < MAXBSIZE) { 1646 sblock.fs_bsize <<= 1; 1647 if ((i & 1) == 0) { 1648 i >>= 1; 1649 } else { 1650 sblock.fs_cpc <<= 1; 1651 mincpc <<= 1; 1652 mincpg = roundup(mincpgcnt, mincpc); 1653 sblock.fs_cpg = mincpg; 1654 } 1655 sblock.fs_frag <<= 1; 1656 sblock.fs_fragshift += 1; 1657 if (sblock.fs_frag <= MAXFRAG) 1658 continue; 1659 } 1660 1661 /* 1662 * Looped far enough. The fragment is now as large as the 1663 * filesystem block! 1664 */ 1665 if (sblock.fs_fsize == sblock.fs_bsize) { 1666 (void) fprintf(stderr, gettext( 1667 "There is no block size that can support this disk\n")); 1668 lockexit(32); 1669 } 1670 1671 /* 1672 * Try a larger fragment. Double the fragment size. 1673 */ 1674 sblock.fs_frag >>= 1; 1675 sblock.fs_fragshift -= 1; 1676 sblock.fs_fsize <<= 1; 1677 sblock.fs_nspf <<= 1; 1678 } 1679 /* 1680 * Insure that cylinder group with mincpg has enough space for inodes 1681 */ 1682 inodecramped = 0; 1683 used *= sectorsize; 1684 nbytes64 = (uint64_t)mincpg * bpcg - used; 1685 inospercg = (uint64_t)roundup((nbytes64 / nbpi), INOPB(&sblock)); 1686 sblock.fs_ipg = (int32_t)inospercg; 1687 while (inospercg > MAXIpG(&sblock)) { 1688 inodecramped = 1; 1689 if (mincpc == 1 || sblock.fs_frag == 1 || 1690 sblock.fs_bsize == MINBSIZE) 1691 break; 1692 nbytes64 = (uint64_t)mincpg * bpcg - used; 1693 (void) fprintf(stderr, 1694 gettext("With a block size of %d %s %lu\n"), 1695 sblock.fs_bsize, gettext("minimum bytes per inode is"), 1696 (uint32_t)(nbytes64 / MAXIpG(&sblock) + 1)); 1697 sblock.fs_bsize >>= 1; 1698 sblock.fs_frag >>= 1; 1699 sblock.fs_fragshift -= 1; 1700 mincpc >>= 1; 1701 sblock.fs_cpg = roundup(mincpgcnt, mincpc); 1702 if (CGSIZE(&sblock) > sblock.fs_bsize) { 1703 sblock.fs_bsize <<= 1; 1704 break; 1705 } 1706 mincpg = sblock.fs_cpg; 1707 nbytes64 = (uint64_t)mincpg * bpcg - used; 1708 inospercg = (uint64_t)roundup((nbytes64 / nbpi), 1709 INOPB(&sblock)); 1710 sblock.fs_ipg = (int32_t)inospercg; 1711 } 1712 if (inodecramped) { 1713 if (inospercg > MAXIpG(&sblock)) { 1714 nbytes64 = (uint64_t)mincpg * bpcg - used; 1715 (void) fprintf(stderr, gettext( 1716 "Minimum bytes per inode is %d\n"), 1717 (uint32_t)(nbytes64 / MAXIpG(&sblock) + 1)); 1718 } else if (!mapcramped) { 1719 (void) fprintf(stderr, gettext( 1720 "With %ld bytes per inode, minimum cylinders per group is %ld\n"), 1721 nbpi, mincpg); 1722 } 1723 } 1724 if (mapcramped) { 1725 (void) fprintf(stderr, gettext( 1726 "With %d sectors per cylinder, minimum cylinders " 1727 "per group is %ld\n"), 1728 sblock.fs_spc, mincpg); 1729 } 1730 if (inodecramped || mapcramped) { 1731 /* 1732 * To make this at least somewhat comprehensible in 1733 * the world of i18n, figure out what we're going to 1734 * say and then say it all at one time. The days of 1735 * needing to scrimp on string space are behind us.... 1736 */ 1737 if ((sblock.fs_bsize != bsize) && 1738 (sblock.fs_fsize != fragsize)) { 1739 (void) fprintf(stderr, gettext( 1740 "This requires the block size to be changed from %ld to %d\n" 1741 "and the fragment size to be changed from %ld to %d\n"), 1742 bsize, sblock.fs_bsize, 1743 fragsize, sblock.fs_fsize); 1744 } else if (sblock.fs_bsize != bsize) { 1745 (void) fprintf(stderr, gettext( 1746 "This requires the block size to be changed from %ld to %d\n"), 1747 bsize, sblock.fs_bsize); 1748 } else if (sblock.fs_fsize != fragsize) { 1749 (void) fprintf(stderr, gettext( 1750 "This requires the fragment size to be changed from %ld to %d\n"), 1751 fragsize, sblock.fs_fsize); 1752 } else { 1753 (void) fprintf(stderr, gettext( 1754 "Unable to make filesystem fit with the given constraints\n")); 1755 } 1756 (void) fprintf(stderr, gettext( 1757 "Please re-run mkfs with corrected parameters\n")); 1758 lockexit(32); 1759 } 1760 /* 1761 * Calculate the number of cylinders per group 1762 */ 1763 sblock.fs_cpg = cpg; 1764 if (sblock.fs_cpg % mincpc != 0) { 1765 (void) fprintf(stderr, gettext( 1766 "Warning: cylinder groups must have a multiple " 1767 "of %ld cylinders with the given\n parameters\n"), 1768 mincpc); 1769 sblock.fs_cpg = roundup(sblock.fs_cpg, mincpc); 1770 (void) fprintf(stderr, gettext("Rounded cgsize up to %d\n"), 1771 sblock.fs_cpg); 1772 } 1773 /* 1774 * Must insure there is enough space for inodes 1775 */ 1776 /* if these calculations are changed, check dump_fscmd also */ 1777 nbytes64 = (uint64_t)sblock.fs_cpg * bpcg - used; 1778 sblock.fs_ipg = roundup((uint32_t)(nbytes64 / nbpi), INOPB(&sblock)); 1779 1780 /* 1781 * Slim down cylinders per group, until the inodes can fit. 1782 */ 1783 while (sblock.fs_ipg > MAXIpG(&sblock)) { 1784 inodecramped = 1; 1785 sblock.fs_cpg -= mincpc; 1786 nbytes64 = (uint64_t)sblock.fs_cpg * bpcg - used; 1787 sblock.fs_ipg = roundup((uint32_t)(nbytes64 / nbpi), 1788 INOPB(&sblock)); 1789 } 1790 /* 1791 * Must insure there is enough space to hold block map. 1792 * Cut down on cylinders per group, until the cg struct fits in a 1793 * filesystem block. 1794 */ 1795 while (CGSIZE(&sblock) > sblock.fs_bsize) { 1796 mapcramped = 1; 1797 sblock.fs_cpg -= mincpc; 1798 nbytes64 = (uint64_t)sblock.fs_cpg * bpcg - used; 1799 sblock.fs_ipg = roundup((uint32_t)(nbytes64 / nbpi), 1800 INOPB(&sblock)); 1801 } 1802 sblock.fs_fpg = (sblock.fs_cpg * sblock.fs_spc) / NSPF(&sblock); 1803 if ((sblock.fs_cpg * sblock.fs_spc) % NSPB(&sblock) != 0) { 1804 (void) fprintf(stderr, 1805 gettext("newfs: panic (fs_cpg * fs_spc) %% NSPF != 0\n")); 1806 lockexit(32); 1807 } 1808 if (sblock.fs_cpg < mincpg) { 1809 (void) fprintf(stderr, gettext( 1810 "With the given parameters, cgsize must be at least %ld; please re-run mkfs\n"), 1811 mincpg); 1812 lockexit(32); 1813 } 1814 sblock.fs_cgsize = fragroundup(&sblock, CGSIZE(&sblock)); 1815 grow20: 1816 /* 1817 * Now have size for file system and nsect and ntrak. 1818 * Determine number of cylinders and blocks in the file system. 1819 */ 1820 fssize_frag = (int64_t)dbtofsb(&sblock, fssize_db); 1821 if (fssize_frag > INT_MAX) { 1822 (void) fprintf(stderr, gettext( 1823 "There are too many fragments in the system, increase fragment size\n"), 1824 mincpg); 1825 lockexit(32); 1826 } 1827 sblock.fs_size = (int32_t)fssize_frag; 1828 sblock.fs_ncyl = (int32_t)(fssize_frag * NSPF(&sblock) / sblock.fs_spc); 1829 if (fssize_frag * NSPF(&sblock) > 1830 (uint64_t)sblock.fs_ncyl * sblock.fs_spc) { 1831 sblock.fs_ncyl++; 1832 warn = 1; 1833 } 1834 if (sblock.fs_ncyl < 1) { 1835 (void) fprintf(stderr, gettext( 1836 "file systems must have at least one cylinder\n")); 1837 lockexit(32); 1838 } 1839 if (grow) 1840 goto grow30; 1841 /* 1842 * Determine feasability/values of rotational layout tables. 1843 * 1844 * The size of the rotational layout tables is limited by the size 1845 * of the file system block, fs_bsize. The amount of space 1846 * available for tables is calculated as (fs_bsize - sizeof (struct 1847 * fs)). The size of these tables is inversely proportional to the 1848 * block size of the file system. The size increases if sectors per 1849 * track are not powers of two, because more cylinders must be 1850 * described by the tables before the rotational pattern repeats 1851 * (fs_cpc). 1852 */ 1853 sblock.fs_postblformat = FS_DYNAMICPOSTBLFMT; 1854 sblock.fs_sbsize = fragroundup(&sblock, sizeof (struct fs)); 1855 sblock.fs_npsect = sblock.fs_nsect; 1856 if (sblock.fs_ntrak == 1) { 1857 sblock.fs_cpc = 0; 1858 goto next; 1859 } 1860 postblsize = sblock.fs_nrpos * sblock.fs_cpc * sizeof (short); 1861 rotblsize = sblock.fs_cpc * sblock.fs_spc / NSPB(&sblock); 1862 totalsbsize = sizeof (struct fs) + rotblsize; 1863 1864 /* do static allocation if nrpos == 8 and fs_cpc == 16 */ 1865 if (sblock.fs_nrpos == 8 && sblock.fs_cpc <= 16) { 1866 /* use old static table space */ 1867 sblock.fs_postbloff = (char *)(&sblock.fs_opostbl[0][0]) - 1868 (char *)(&sblock.fs_link); 1869 sblock.fs_rotbloff = &sblock.fs_space[0] - 1870 (uchar_t *)(&sblock.fs_link); 1871 } else { 1872 /* use 4.3 dynamic table space */ 1873 sblock.fs_postbloff = &sblock.fs_space[0] - 1874 (uchar_t *)(&sblock.fs_link); 1875 sblock.fs_rotbloff = sblock.fs_postbloff + postblsize; 1876 totalsbsize += postblsize; 1877 } 1878 if (totalsbsize > sblock.fs_bsize || 1879 sblock.fs_nsect > (1 << NBBY) * NSPB(&sblock)) { 1880 (void) fprintf(stderr, gettext( 1881 "Warning: insufficient space in super block for\n" 1882 "rotational layout tables with nsect %d, ntrack %d, " 1883 "and nrpos %d.\nOmitting tables - file system " 1884 "performance may be impaired.\n"), 1885 sblock.fs_nsect, sblock.fs_ntrak, sblock.fs_nrpos); 1886 1887 /* 1888 * Setting fs_cpc to 0 tells alloccgblk() in ufs_alloc.c to 1889 * ignore the positional layout table and rotational 1890 * position table. 1891 */ 1892 sblock.fs_cpc = 0; 1893 goto next; 1894 } 1895 sblock.fs_sbsize = fragroundup(&sblock, totalsbsize); 1896 1897 1898 /* 1899 * calculate the available blocks for each rotational position 1900 */ 1901 for (cylno = 0; cylno < sblock.fs_cpc; cylno++) 1902 for (rpos = 0; rpos < sblock.fs_nrpos; rpos++) 1903 fs_postbl(&sblock, cylno)[rpos] = -1; 1904 for (i = (rotblsize - 1) * sblock.fs_frag; 1905 i >= 0; i -= sblock.fs_frag) { 1906 cylno = cbtocylno(&sblock, i); 1907 rpos = cbtorpos(&sblock, i); 1908 blk = fragstoblks(&sblock, i); 1909 if (fs_postbl(&sblock, cylno)[rpos] == -1) 1910 fs_rotbl(&sblock)[blk] = 0; 1911 else 1912 fs_rotbl(&sblock)[blk] = 1913 fs_postbl(&sblock, cylno)[rpos] - blk; 1914 fs_postbl(&sblock, cylno)[rpos] = blk; 1915 } 1916 next: 1917 grow30: 1918 /* 1919 * Compute/validate number of cylinder groups. 1920 * Note that if an excessively large filesystem is specified 1921 * (e.g., more than 16384 cylinders for an 8K filesystem block), it 1922 * does not get detected until checksummarysize() 1923 */ 1924 sblock.fs_ncg = sblock.fs_ncyl / sblock.fs_cpg; 1925 if (sblock.fs_ncyl % sblock.fs_cpg) 1926 sblock.fs_ncg++; 1927 sblock.fs_dblkno = sblock.fs_iblkno + sblock.fs_ipg / INOPF(&sblock); 1928 i = MIN(~sblock.fs_cgmask, sblock.fs_ncg - 1); 1929 ibpcl = cgdmin(&sblock, i) - cgbase(&sblock, i); 1930 if (ibpcl >= sblock.fs_fpg) { 1931 (void) fprintf(stderr, gettext( 1932 "inode blocks/cyl group (%d) >= data blocks (%d)\n"), 1933 cgdmin(&sblock, i) - cgbase(&sblock, i) / sblock.fs_frag, 1934 sblock.fs_fpg / sblock.fs_frag); 1935 if ((ibpcl < 0) || (sblock.fs_fpg < 0)) { 1936 (void) fprintf(stderr, gettext( 1937 "number of cylinders per cylinder group (%d) must be decreased.\n"), 1938 sblock.fs_cpg); 1939 } else { 1940 (void) fprintf(stderr, gettext( 1941 "number of cylinders per cylinder group (%d) must be increased.\n"), 1942 sblock.fs_cpg); 1943 } 1944 (void) fprintf(stderr, gettext( 1945 "Note that cgsize may have been adjusted to allow struct cg to fit.\n")); 1946 lockexit(32); 1947 } 1948 j = sblock.fs_ncg - 1; 1949 if ((i = fssize_frag - j * sblock.fs_fpg) < sblock.fs_fpg && 1950 cgdmin(&sblock, j) - cgbase(&sblock, j) > i) { 1951 (void) fprintf(stderr, gettext( 1952 "Warning: inode blocks/cyl group (%d) >= data " 1953 "blocks (%ld) in last\n cylinder group. This " 1954 "implies %ld sector(s) cannot be allocated.\n"), 1955 (cgdmin(&sblock, j) - cgbase(&sblock, j)) / sblock.fs_frag, 1956 i / sblock.fs_frag, i * NSPF(&sblock)); 1957 /* 1958 * If there is only one cylinder group and that is not even 1959 * big enough to hold the inodes, exit. 1960 */ 1961 if (sblock.fs_ncg == 1) 1962 cg_too_small = 1; 1963 sblock.fs_ncg--; 1964 sblock.fs_ncyl -= sblock.fs_ncyl % sblock.fs_cpg; 1965 sblock.fs_size = fssize_frag = 1966 (int64_t)sblock.fs_ncyl * (int64_t)sblock.fs_spc / 1967 (int64_t)NSPF(&sblock); 1968 warn = 0; 1969 } 1970 if (warn && !spc_flag) { 1971 (void) fprintf(stderr, gettext( 1972 "Warning: %d sector(s) in last cylinder unallocated\n"), 1973 sblock.fs_spc - (uint32_t)(fssize_frag * NSPF(&sblock) - 1974 (uint64_t)(sblock.fs_ncyl - 1) * sblock.fs_spc)); 1975 } 1976 /* 1977 * fill in remaining fields of the super block 1978 */ 1979 1980 /* 1981 * The csum records are stored in cylinder group 0, starting at 1982 * cgdmin, the first data block. 1983 */ 1984 sblock.fs_csaddr = cgdmin(&sblock, 0); 1985 sblock.fs_cssize = 1986 fragroundup(&sblock, sblock.fs_ncg * sizeof (struct csum)); 1987 i = sblock.fs_bsize / sizeof (struct csum); 1988 sblock.fs_csmask = ~(i - 1); 1989 for (sblock.fs_csshift = 0; i > 1; i >>= 1) 1990 sblock.fs_csshift++; 1991 fscs = (struct csum *)calloc(1, sblock.fs_cssize); 1992 1993 checksummarysize(); 1994 if (mtb == 'y') { 1995 sblock.fs_magic = MTB_UFS_MAGIC; 1996 sblock.fs_version = MTB_UFS_VERSION_1; 1997 } else { 1998 sblock.fs_magic = FS_MAGIC; 1999 if (use_efi_dflts) 2000 sblock.fs_version = UFS_EFISTYLE4NONEFI_VERSION_2; 2001 else 2002 sblock.fs_version = UFS_VERSION_MIN; 2003 } 2004 2005 if (grow) { 2006 bcopy((caddr_t)grow_fscs, (caddr_t)fscs, (int)grow_fs_cssize); 2007 extendsummaryinfo(); 2008 goto grow40; 2009 } 2010 sblock.fs_rotdelay = rotdelay; 2011 sblock.fs_maxcontig = maxcontig; 2012 sblock.fs_maxbpg = MAXBLKPG(sblock.fs_bsize); 2013 2014 sblock.fs_rps = rps; 2015 sblock.fs_cgrotor = 0; 2016 sblock.fs_cstotal.cs_ndir = 0; 2017 sblock.fs_cstotal.cs_nbfree = 0; 2018 sblock.fs_cstotal.cs_nifree = 0; 2019 sblock.fs_cstotal.cs_nffree = 0; 2020 sblock.fs_fmod = 0; 2021 sblock.fs_ronly = 0; 2022 sblock.fs_time = mkfstime; 2023 sblock.fs_state = FSOKAY - sblock.fs_time; 2024 sblock.fs_clean = FSCLEAN; 2025 grow40: 2026 2027 /* 2028 * If all that's needed is a dump of the superblock we 2029 * would use by default, we've got it now. So, splat it 2030 * out and leave. 2031 */ 2032 if (rflag) { 2033 dump_sblock(); 2034 lockexit(0); 2035 } 2036 /* 2037 * Dump out summary information about file system. 2038 */ 2039 (void) fprintf(stderr, gettext( 2040 "%s:\t%lld sectors in %d cylinders of %d tracks, %d sectors\n"), 2041 fsys, (uint64_t)sblock.fs_size * NSPF(&sblock), sblock.fs_ncyl, 2042 sblock.fs_ntrak, sblock.fs_nsect); 2043 (void) fprintf(stderr, gettext( 2044 "\t%.1fMB in %d cyl groups (%d c/g, %.2fMB/g, %d i/g)\n"), 2045 (float)sblock.fs_size * sblock.fs_fsize / MB, sblock.fs_ncg, 2046 sblock.fs_cpg, (float)sblock.fs_fpg * sblock.fs_fsize / MB, 2047 sblock.fs_ipg); 2048 2049 tmpbuf = calloc(sblock.fs_ncg / 50 + 500, 1); 2050 if (tmpbuf == NULL) { 2051 perror("calloc"); 2052 lockexit(32); 2053 } 2054 if (cg_too_small) { 2055 (void) fprintf(stderr, gettext("File system creation failed. " 2056 "There is only one cylinder group and\nthat is " 2057 "not even big enough to hold the inodes.\n")); 2058 lockexit(32); 2059 } 2060 /* 2061 * Now build the cylinders group blocks and 2062 * then print out indices of cylinder groups. 2063 */ 2064 tprintf(gettext( 2065 "super-block backups (for fsck -F ufs -o b=#) at:\n")); 2066 for (width = cylno = 0; cylno < sblock.fs_ncg && cylno < 10; cylno++) { 2067 if ((grow == 0) || (cylno >= grow_fs_ncg)) 2068 initcg(cylno); 2069 num = fsbtodb(&sblock, (uint64_t)cgsblock(&sblock, cylno)); 2070 /* 2071 * If Nflag and if the disk is larger than the CHSLIMIT, 2072 * then sanity test the superblocks before reporting. If there 2073 * are too many superblocks which look insane, we have 2074 * to retry with alternate logic. If both methods have 2075 * failed, then our efforts to arrive at alternate 2076 * superblocks failed, so complain and exit. 2077 */ 2078 if (Nflag && retry) { 2079 skip_this_sb = 0; 2080 rdfs((diskaddr_t)num, sbsize, (char *)&altsblock); 2081 ret = checksblock(altsblock, 1); 2082 if (ret) { 2083 skip_this_sb = 1; 2084 invalid_sb_cnt++; 2085 dprintf(("DeBuG checksblock() failed - error : %d" 2086 " for sb : %llu invalid_sb_cnt : %d\n", 2087 ret, num, invalid_sb_cnt)); 2088 } else { 2089 /* 2090 * Though the superblock looks sane, verify if the 2091 * fs_version in the superblock and the logic that 2092 * we are using to arrive at the superblocks match. 2093 */ 2094 if (use_efi_dflts && altsblock.fs_version 2095 != UFS_EFISTYLE4NONEFI_VERSION_2) { 2096 skip_this_sb = 1; 2097 invalid_sb_cnt++; 2098 } 2099 } 2100 if (invalid_sb_cnt >= INVALIDSBLIMIT) { 2101 if (retry > 1) { 2102 (void) fprintf(stderr, gettext( 2103 "Error determining alternate " 2104 "superblock locations\n")); 2105 free(tmpbuf); 2106 lockexit(32); 2107 } 2108 retry++; 2109 use_efi_dflts = !use_efi_dflts; 2110 free(tmpbuf); 2111 goto retry_alternate_logic; 2112 } 2113 if (skip_this_sb) 2114 continue; 2115 } 2116 (void) sprintf(pbuf, " %llu,", num); 2117 plen = strlen(pbuf); 2118 if ((width + plen) > (WIDTH - 1)) { 2119 width = plen; 2120 tprintf("\n"); 2121 } else { 2122 width += plen; 2123 } 2124 if (Nflag && retry) 2125 (void) strncat(tmpbuf, pbuf, strlen(pbuf)); 2126 else 2127 (void) fprintf(stderr, "%s", pbuf); 2128 } 2129 tprintf("\n"); 2130 2131 remaining_cg = sblock.fs_ncg - cylno; 2132 2133 /* 2134 * If there are more than 300 cylinder groups still to be 2135 * initialized, print a "." for every 50 cylinder groups. 2136 */ 2137 if (remaining_cg > 300) { 2138 tprintf(gettext("Initializing cylinder groups:\n")); 2139 do_dot = 1; 2140 } 2141 2142 /* 2143 * Now initialize all cylinder groups between the first ten 2144 * and the last ten. 2145 * 2146 * If the number of cylinder groups was less than 10, all of the 2147 * cylinder group offsets would have printed in the last loop 2148 * and cylno will already be equal to sblock.fs_ncg and so this 2149 * loop will not be entered. If there are less than 20 cylinder 2150 * groups, cylno is already less than fs_ncg - 10, so this loop 2151 * won't be entered in that case either. 2152 */ 2153 2154 i = 0; 2155 for (; cylno < sblock.fs_ncg - 10; cylno++) { 2156 if ((grow == 0) || (cylno >= grow_fs_ncg)) 2157 initcg(cylno); 2158 if (do_dot && cylno % 50 == 0) { 2159 tprintf("."); 2160 i++; 2161 if (i == WIDTH - 1) { 2162 tprintf("\n"); 2163 i = 0; 2164 } 2165 } 2166 } 2167 2168 /* 2169 * Now print the cylinder group offsets for the last 10 2170 * cylinder groups, if any are left. 2171 */ 2172 2173 if (do_dot) { 2174 tprintf(gettext( 2175 "\nsuper-block backups for last 10 cylinder groups at:\n")); 2176 } 2177 for (width = 0; cylno < sblock.fs_ncg; cylno++) { 2178 if ((grow == 0) || (cylno >= grow_fs_ncg)) 2179 initcg(cylno); 2180 num = fsbtodb(&sblock, (uint64_t)cgsblock(&sblock, cylno)); 2181 if (Nflag && retry) { 2182 skip_this_sb = 0; 2183 rdfs((diskaddr_t)num, sbsize, (char *)&altsblock); 2184 ret = checksblock(altsblock, 1); 2185 if (ret) { 2186 skip_this_sb = 1; 2187 invalid_sb_cnt++; 2188 dprintf(("DeBuG checksblock() failed - error : %d" 2189 " for sb : %llu invalid_sb_cnt : %d\n", 2190 ret, num, invalid_sb_cnt)); 2191 } else { 2192 /* 2193 * Though the superblock looks sane, verify if the 2194 * fs_version in the superblock and the logic that 2195 * we are using to arrive at the superblocks match. 2196 */ 2197 if (use_efi_dflts && altsblock.fs_version 2198 != UFS_EFISTYLE4NONEFI_VERSION_2) { 2199 skip_this_sb = 1; 2200 invalid_sb_cnt++; 2201 } 2202 } 2203 if (invalid_sb_cnt >= INVALIDSBLIMIT) { 2204 if (retry > 1) { 2205 (void) fprintf(stderr, gettext( 2206 "Error determining alternate " 2207 "superblock locations\n")); 2208 free(tmpbuf); 2209 lockexit(32); 2210 } 2211 retry++; 2212 use_efi_dflts = !use_efi_dflts; 2213 free(tmpbuf); 2214 goto retry_alternate_logic; 2215 } 2216 if (skip_this_sb) 2217 continue; 2218 } 2219 /* Don't print ',' for the last superblock */ 2220 if (cylno == sblock.fs_ncg-1) 2221 (void) sprintf(pbuf, " %llu", num); 2222 else 2223 (void) sprintf(pbuf, " %llu,", num); 2224 plen = strlen(pbuf); 2225 if ((width + plen) > (WIDTH - 1)) { 2226 width = plen; 2227 tprintf("\n"); 2228 } else { 2229 width += plen; 2230 } 2231 if (Nflag && retry) 2232 (void) strncat(tmpbuf, pbuf, strlen(pbuf)); 2233 else 2234 (void) fprintf(stderr, "%s", pbuf); 2235 } 2236 tprintf("\n"); 2237 if (Nflag) { 2238 if (retry) 2239 (void) fprintf(stderr, "%s", tmpbuf); 2240 free(tmpbuf); 2241 lockexit(0); 2242 } 2243 2244 free(tmpbuf); 2245 if (grow) 2246 goto grow50; 2247 2248 /* 2249 * Now construct the initial file system, 2250 * then write out the super-block. 2251 */ 2252 fsinit(); 2253 grow50: 2254 /* 2255 * write the superblock and csum information 2256 */ 2257 wtsb(); 2258 2259 /* 2260 * extend the last cylinder group in the original file system 2261 */ 2262 if (grow) { 2263 extendcg(grow_fs_ncg-1); 2264 wtsb(); 2265 } 2266 2267 /* 2268 * Write out the duplicate super blocks to the first 10 2269 * cylinder groups (or fewer, if there are fewer than 10 2270 * cylinder groups). 2271 */ 2272 for (cylno = 0; cylno < sblock.fs_ncg && cylno < 10; cylno++) 2273 awtfs(fsbtodb(&sblock, (uint64_t)cgsblock(&sblock, cylno)), 2274 (int)sbsize, (char *)&sblock, SAVE); 2275 2276 /* 2277 * Now write out duplicate super blocks to the remaining 2278 * cylinder groups. In the case of multi-terabyte file 2279 * systems, just write out the super block to the last ten 2280 * cylinder groups (or however many are left). 2281 */ 2282 if (mtb == 'y') { 2283 if (sblock.fs_ncg <= 10) 2284 cylno = sblock.fs_ncg; 2285 else if (sblock.fs_ncg <= 20) 2286 cylno = 10; 2287 else 2288 cylno = sblock.fs_ncg - 10; 2289 } 2290 2291 for (; cylno < sblock.fs_ncg; cylno++) 2292 awtfs(fsbtodb(&sblock, (uint64_t)cgsblock(&sblock, cylno)), 2293 (int)sbsize, (char *)&sblock, SAVE); 2294 2295 /* 2296 * Flush out all the AIO writes we've done. It's not 2297 * necessary to do this explicitly, but it's the only 2298 * way to report any errors from those writes. 2299 */ 2300 flush_writes(); 2301 2302 /* 2303 * set clean flag 2304 */ 2305 if (grow) 2306 sblock.fs_clean = grow_fs_clean; 2307 else 2308 sblock.fs_clean = FSCLEAN; 2309 sblock.fs_time = mkfstime; 2310 sblock.fs_state = FSOKAY - sblock.fs_time; 2311 wtfs((diskaddr_t)(SBOFF / sectorsize), sbsize, (char *)&sblock); 2312 isbad = 0; 2313 2314 if (fsync(fso) == -1) { 2315 saverr = errno; 2316 (void) fprintf(stderr, 2317 gettext("mkfs: fsync failed on write disk: %s\n"), 2318 strerror(saverr)); 2319 /* we're just cleaning up, so keep going */ 2320 } 2321 if (close(fsi) == -1) { 2322 saverr = errno; 2323 (void) fprintf(stderr, 2324 gettext("mkfs: close failed on read disk: %s\n"), 2325 strerror(saverr)); 2326 /* we're just cleaning up, so keep going */ 2327 } 2328 if (close(fso) == -1) { 2329 saverr = errno; 2330 (void) fprintf(stderr, 2331 gettext("mkfs: close failed on write disk: %s\n"), 2332 strerror(saverr)); 2333 /* we're just cleaning up, so keep going */ 2334 } 2335 fsi = fso = -1; 2336 2337 #ifndef STANDALONE 2338 lockexit(0); 2339 #endif 2340 2341 return (0); 2342 } 2343 2344 /* 2345 * Figure out how big the partition we're dealing with is. 2346 * The value returned is in disk blocks (sectors); 2347 */ 2348 static diskaddr_t 2349 get_max_size(int fd) 2350 { 2351 struct vtoc vtoc; 2352 dk_gpt_t *efi_vtoc; 2353 diskaddr_t slicesize; 2354 2355 int index = read_vtoc(fd, &vtoc); 2356 2357 if (index >= 0) { 2358 label_type = LABEL_TYPE_VTOC; 2359 } else { 2360 if (index == VT_ENOTSUP || index == VT_ERROR) { 2361 /* it might be an EFI label */ 2362 index = efi_alloc_and_read(fd, &efi_vtoc); 2363 label_type = LABEL_TYPE_EFI; 2364 } 2365 } 2366 2367 if (index < 0) { 2368 switch (index) { 2369 case VT_ERROR: 2370 break; 2371 case VT_EIO: 2372 errno = EIO; 2373 break; 2374 case VT_EINVAL: 2375 errno = EINVAL; 2376 } 2377 perror(gettext("Can not determine partition size")); 2378 lockexit(32); 2379 } 2380 2381 if (label_type == LABEL_TYPE_EFI) { 2382 slicesize = efi_vtoc->efi_parts[index].p_size; 2383 efi_free(efi_vtoc); 2384 } else { 2385 /* 2386 * In the vtoc struct, p_size is a 32-bit signed quantity. 2387 * In the dk_gpt struct (efi's version of the vtoc), p_size 2388 * is an unsigned 64-bit quantity. By casting the vtoc's 2389 * psize to an unsigned 32-bit quantity, it will be copied 2390 * to 'slicesize' (an unsigned 64-bit diskaddr_t) without 2391 * sign extension. 2392 */ 2393 2394 slicesize = (uint32_t)vtoc.v_part[index].p_size; 2395 } 2396 2397 dprintf(("DeBuG get_max_size index = %d, p_size = %lld, dolimit = %d\n", 2398 index, slicesize, (slicesize > FS_MAX))); 2399 2400 /* 2401 * The next line limits a UFS file system to the maximum 2402 * supported size. 2403 */ 2404 2405 if (slicesize > FS_MAX) 2406 return (FS_MAX); 2407 return (slicesize); 2408 } 2409 2410 static long 2411 get_max_track_size(int fd) 2412 { 2413 struct dk_cinfo ci; 2414 long track_size = -1; 2415 2416 if (ioctl(fd, DKIOCINFO, &ci) == 0) { 2417 track_size = ci.dki_maxtransfer * DEV_BSIZE; 2418 } 2419 2420 if ((track_size < 0)) { 2421 int error = 0; 2422 int maxphys; 2423 int gotit = 0; 2424 2425 gotit = fsgetmaxphys(&maxphys, &error); 2426 if (gotit) { 2427 track_size = MIN(MB, maxphys); 2428 } else { 2429 (void) fprintf(stderr, gettext( 2430 "Warning: Could not get system value for maxphys. The value for\n" 2431 "maxcontig will default to 1MB.\n")); 2432 track_size = MB; 2433 } 2434 } 2435 return (track_size); 2436 } 2437 2438 /* 2439 * Initialize a cylinder group. 2440 */ 2441 static void 2442 initcg(int cylno) 2443 { 2444 diskaddr_t cbase, d; 2445 diskaddr_t dlower; /* last data block before cg metadata */ 2446 diskaddr_t dupper; /* first data block after cg metadata */ 2447 diskaddr_t dmax; 2448 int64_t i; 2449 struct csum *cs; 2450 struct dinode *inode_buffer; 2451 int size; 2452 2453 /* 2454 * Variables used to store intermediate results as a part of 2455 * the internal implementation of the cbtocylno() macros. 2456 */ 2457 diskaddr_t bno; /* UFS block number (not sector number) */ 2458 int cbcylno; /* current cylinder number */ 2459 int cbcylno_sect; /* sector offset within cylinder */ 2460 int cbsect_incr; /* amount to increment sector offset */ 2461 2462 /* 2463 * Variables used to store intermediate results as a part of 2464 * the internal implementation of the cbtorpos() macros. 2465 */ 2466 short *cgblks; /* pointer to array of free blocks in cg */ 2467 int trackrpos; /* tmp variable for rotation position */ 2468 int trackoff; /* offset within a track */ 2469 int trackoff_incr; /* amount to increment trackoff */ 2470 int rpos; /* rotation position of current block */ 2471 int rpos_incr; /* amount to increment rpos per block */ 2472 2473 union cgun *icgun; /* local pointer to a cg summary block */ 2474 #define icg (icgun->cg) 2475 2476 icgun = (union cgun *)getbuf(&cgsumbuf, sizeof (union cgun)); 2477 2478 /* 2479 * Determine block bounds for cylinder group. 2480 * Allow space for super block summary information in first 2481 * cylinder group. 2482 */ 2483 cbase = cgbase(&sblock, cylno); 2484 dmax = cbase + sblock.fs_fpg; 2485 if (dmax > sblock.fs_size) /* last cg may be smaller than normal */ 2486 dmax = sblock.fs_size; 2487 dlower = cgsblock(&sblock, cylno) - cbase; 2488 dupper = cgdmin(&sblock, cylno) - cbase; 2489 if (cylno == 0) 2490 dupper += howmany(sblock.fs_cssize, sblock.fs_fsize); 2491 cs = fscs + cylno; 2492 icg.cg_time = mkfstime; 2493 icg.cg_magic = CG_MAGIC; 2494 icg.cg_cgx = cylno; 2495 /* last one gets whatever's left */ 2496 if (cylno == sblock.fs_ncg - 1) 2497 icg.cg_ncyl = sblock.fs_ncyl - (sblock.fs_cpg * cylno); 2498 else 2499 icg.cg_ncyl = sblock.fs_cpg; 2500 icg.cg_niblk = sblock.fs_ipg; 2501 icg.cg_ndblk = dmax - cbase; 2502 icg.cg_cs.cs_ndir = 0; 2503 icg.cg_cs.cs_nffree = 0; 2504 icg.cg_cs.cs_nbfree = 0; 2505 icg.cg_cs.cs_nifree = 0; 2506 icg.cg_rotor = 0; 2507 icg.cg_frotor = 0; 2508 icg.cg_irotor = 0; 2509 icg.cg_btotoff = &icg.cg_space[0] - (uchar_t *)(&icg.cg_link); 2510 icg.cg_boff = icg.cg_btotoff + sblock.fs_cpg * sizeof (long); 2511 icg.cg_iusedoff = icg.cg_boff + 2512 sblock.fs_cpg * sblock.fs_nrpos * sizeof (short); 2513 icg.cg_freeoff = icg.cg_iusedoff + howmany(sblock.fs_ipg, NBBY); 2514 icg.cg_nextfreeoff = icg.cg_freeoff + 2515 howmany(sblock.fs_cpg * sblock.fs_spc / NSPF(&sblock), NBBY); 2516 for (i = 0; i < sblock.fs_frag; i++) { 2517 icg.cg_frsum[i] = 0; 2518 } 2519 bzero((caddr_t)cg_inosused(&icg), icg.cg_freeoff - icg.cg_iusedoff); 2520 icg.cg_cs.cs_nifree += sblock.fs_ipg; 2521 if (cylno == 0) 2522 for (i = 0; i < UFSROOTINO; i++) { 2523 setbit(cg_inosused(&icg), i); 2524 icg.cg_cs.cs_nifree--; 2525 } 2526 2527 /* 2528 * Initialize all the inodes in the cylinder group using 2529 * random numbers. 2530 */ 2531 size = sblock.fs_ipg * sizeof (struct dinode); 2532 inode_buffer = (struct dinode *)getbuf(&inodebuf, size); 2533 2534 for (i = 0; i < sblock.fs_ipg; i++) { 2535 IRANDOMIZE(&(inode_buffer[i].di_ic)); 2536 } 2537 2538 /* 2539 * Write all inodes in a single write for performance. 2540 */ 2541 awtfs(fsbtodb(&sblock, (uint64_t)cgimin(&sblock, cylno)), (int)size, 2542 (char *)inode_buffer, RELEASE); 2543 2544 bzero((caddr_t)cg_blktot(&icg), icg.cg_boff - icg.cg_btotoff); 2545 bzero((caddr_t)cg_blks(&sblock, &icg, 0), 2546 icg.cg_iusedoff - icg.cg_boff); 2547 bzero((caddr_t)cg_blksfree(&icg), icg.cg_nextfreeoff - icg.cg_freeoff); 2548 2549 if (cylno > 0) { 2550 for (d = 0; d < dlower; d += sblock.fs_frag) { 2551 setblock(&sblock, cg_blksfree(&icg), d/sblock.fs_frag); 2552 icg.cg_cs.cs_nbfree++; 2553 cg_blktot(&icg)[cbtocylno(&sblock, d)]++; 2554 cg_blks(&sblock, &icg, cbtocylno(&sblock, d)) 2555 [cbtorpos(&sblock, d)]++; 2556 } 2557 sblock.fs_dsize += dlower; 2558 } 2559 sblock.fs_dsize += icg.cg_ndblk - dupper; 2560 if ((i = dupper % sblock.fs_frag) != 0) { 2561 icg.cg_frsum[sblock.fs_frag - i]++; 2562 for (d = dupper + sblock.fs_frag - i; dupper < d; dupper++) { 2563 setbit(cg_blksfree(&icg), dupper); 2564 icg.cg_cs.cs_nffree++; 2565 } 2566 } 2567 2568 /* 2569 * WARNING: The following code is somewhat confusing, but 2570 * results in a substantial performance improvement in mkfs. 2571 * 2572 * Instead of using cbtocylno() and cbtorpos() macros, we 2573 * keep track of all the intermediate state of those macros 2574 * in some variables. This allows simple addition to be 2575 * done to calculate the results as we step through the 2576 * blocks in an orderly fashion instead of the slower 2577 * multiplication and division the macros are forced to 2578 * used so they can support random input. (Multiplication, 2579 * division, and remainder operations typically take about 2580 * 10x as many processor cycles as other operations.) 2581 * 2582 * The basic idea is to take code: 2583 * 2584 * for (x = starting_x; x < max; x++) 2585 * y = (x * c) / z 2586 * 2587 * and rewrite it to take advantage of the fact that 2588 * the variable x is incrementing in an orderly way: 2589 * 2590 * intermediate = starting_x * c 2591 * yval = intermediate / z 2592 * for (x = starting_x; x < max; x++) { 2593 * y = yval; 2594 * intermediate += c 2595 * if (intermediate > z) { 2596 * yval++; 2597 * intermediate -= z 2598 * } 2599 * } 2600 * 2601 * Performance has improved as much as 4X using this code. 2602 */ 2603 2604 /* 2605 * Initialize the starting points for all the cbtocylno() 2606 * macro variables and figure out the increments needed each 2607 * time through the loop. 2608 */ 2609 cbcylno_sect = dupper * NSPF(&sblock); 2610 cbsect_incr = sblock.fs_frag * NSPF(&sblock); 2611 cbcylno = cbcylno_sect / sblock.fs_spc; 2612 cbcylno_sect %= sblock.fs_spc; 2613 cgblks = cg_blks(&sblock, &icg, cbcylno); 2614 bno = dupper / sblock.fs_frag; 2615 2616 /* 2617 * Initialize the starting points for all the cbtorpos() 2618 * macro variables and figure out the increments needed each 2619 * time through the loop. 2620 * 2621 * It's harder to simplify the cbtorpos() macro if there were 2622 * alternate sectors specified (or if they previously existed 2623 * in the growfs case). Since this is rare, we just revert to 2624 * using the macros in this case and skip the variable setup. 2625 */ 2626 if (!spc_flag) { 2627 trackrpos = (cbcylno_sect % sblock.fs_nsect) * sblock.fs_nrpos; 2628 rpos = trackrpos / sblock.fs_nsect; 2629 trackoff = trackrpos % sblock.fs_nsect; 2630 trackoff_incr = cbsect_incr * sblock.fs_nrpos; 2631 rpos_incr = (trackoff_incr / sblock.fs_nsect) % sblock.fs_nrpos; 2632 trackoff_incr = trackoff_incr % sblock.fs_nsect; 2633 } 2634 2635 /* 2636 * Loop through all the blocks, marking them free and 2637 * updating totals kept in the superblock and cg summary. 2638 */ 2639 for (d = dupper; d + sblock.fs_frag <= dmax - cbase; ) { 2640 setblock(&sblock, cg_blksfree(&icg), bno); 2641 icg.cg_cs.cs_nbfree++; 2642 2643 cg_blktot(&icg)[cbcylno]++; 2644 2645 if (!spc_flag) 2646 cgblks[rpos]++; 2647 else 2648 cg_blks(&sblock, &icg, cbtocylno(&sblock, d)) 2649 [cbtorpos(&sblock, d)]++; 2650 2651 d += sblock.fs_frag; 2652 bno++; 2653 2654 /* 2655 * Increment the sector offset within the cylinder 2656 * for the cbtocylno() macro reimplementation. If 2657 * we're beyond the end of the cylinder, update the 2658 * cylinder number, calculate the offset in the 2659 * new cylinder, and update the cgblks pointer 2660 * to the next rotational position. 2661 */ 2662 cbcylno_sect += cbsect_incr; 2663 if (cbcylno_sect >= sblock.fs_spc) { 2664 cbcylno++; 2665 cbcylno_sect -= sblock.fs_spc; 2666 cgblks += sblock.fs_nrpos; 2667 } 2668 2669 /* 2670 * If there aren't alternate sectors, increment the 2671 * rotational position variables for the cbtorpos() 2672 * reimplementation. Note that we potentially 2673 * increment rpos twice. Once by rpos_incr, and one 2674 * more time when we wrap to a new track because 2675 * trackoff >= fs_nsect. 2676 */ 2677 if (!spc_flag) { 2678 trackoff += trackoff_incr; 2679 rpos += rpos_incr; 2680 if (trackoff >= sblock.fs_nsect) { 2681 trackoff -= sblock.fs_nsect; 2682 rpos++; 2683 } 2684 if (rpos >= sblock.fs_nrpos) 2685 rpos -= sblock.fs_nrpos; 2686 } 2687 } 2688 2689 if (d < dmax - cbase) { 2690 icg.cg_frsum[dmax - cbase - d]++; 2691 for (; d < dmax - cbase; d++) { 2692 setbit(cg_blksfree(&icg), d); 2693 icg.cg_cs.cs_nffree++; 2694 } 2695 } 2696 sblock.fs_cstotal.cs_ndir += icg.cg_cs.cs_ndir; 2697 sblock.fs_cstotal.cs_nffree += icg.cg_cs.cs_nffree; 2698 sblock.fs_cstotal.cs_nbfree += icg.cg_cs.cs_nbfree; 2699 sblock.fs_cstotal.cs_nifree += icg.cg_cs.cs_nifree; 2700 *cs = icg.cg_cs; 2701 awtfs(fsbtodb(&sblock, (uint64_t)cgtod(&sblock, cylno)), 2702 sblock.fs_bsize, (char *)&icg, RELEASE); 2703 } 2704 2705 /* 2706 * initialize the file system 2707 */ 2708 struct inode node; 2709 2710 #define LOSTDIR 2711 #ifdef LOSTDIR 2712 #define PREDEFDIR 3 2713 #else 2714 #define PREDEFDIR 2 2715 #endif 2716 2717 struct direct root_dir[] = { 2718 { UFSROOTINO, sizeof (struct direct), 1, "." }, 2719 { UFSROOTINO, sizeof (struct direct), 2, ".." }, 2720 #ifdef LOSTDIR 2721 { LOSTFOUNDINO, sizeof (struct direct), 10, "lost+found" }, 2722 #endif 2723 }; 2724 #ifdef LOSTDIR 2725 struct direct lost_found_dir[] = { 2726 { LOSTFOUNDINO, sizeof (struct direct), 1, "." }, 2727 { UFSROOTINO, sizeof (struct direct), 2, ".." }, 2728 { 0, DIRBLKSIZ, 0, 0 }, 2729 }; 2730 #endif 2731 char buf[MAXBSIZE]; 2732 2733 static void 2734 fsinit() 2735 { 2736 int i; 2737 2738 2739 /* 2740 * initialize the node 2741 */ 2742 node.i_atime = mkfstime; 2743 node.i_mtime = mkfstime; 2744 node.i_ctime = mkfstime; 2745 #ifdef LOSTDIR 2746 /* 2747 * create the lost+found directory 2748 */ 2749 (void) makedir(lost_found_dir, 2); 2750 for (i = DIRBLKSIZ; i < sblock.fs_bsize; i += DIRBLKSIZ) { 2751 bcopy(&lost_found_dir[2], &buf[i], DIRSIZ(&lost_found_dir[2])); 2752 } 2753 node.i_number = LOSTFOUNDINO; 2754 node.i_smode = node.i_mode = IFDIR | 0700; 2755 node.i_nlink = 2; 2756 node.i_size = sblock.fs_bsize; 2757 node.i_db[0] = alloc((int)node.i_size, node.i_mode); 2758 node.i_blocks = btodb(fragroundup(&sblock, (int)node.i_size)); 2759 IRANDOMIZE(&node.i_ic); 2760 wtfs(fsbtodb(&sblock, (uint64_t)node.i_db[0]), (int)node.i_size, buf); 2761 iput(&node); 2762 #endif 2763 /* 2764 * create the root directory 2765 */ 2766 node.i_number = UFSROOTINO; 2767 node.i_mode = node.i_smode = IFDIR | UMASK; 2768 node.i_nlink = PREDEFDIR; 2769 node.i_size = makedir(root_dir, PREDEFDIR); 2770 node.i_db[0] = alloc(sblock.fs_fsize, node.i_mode); 2771 /* i_size < 2GB because we are initializing the file system */ 2772 node.i_blocks = btodb(fragroundup(&sblock, (int)node.i_size)); 2773 IRANDOMIZE(&node.i_ic); 2774 wtfs(fsbtodb(&sblock, (uint64_t)node.i_db[0]), sblock.fs_fsize, buf); 2775 iput(&node); 2776 } 2777 2778 /* 2779 * construct a set of directory entries in "buf". 2780 * return size of directory. 2781 */ 2782 static int 2783 makedir(struct direct *protodir, int entries) 2784 { 2785 char *cp; 2786 int i; 2787 ushort_t spcleft; 2788 2789 spcleft = DIRBLKSIZ; 2790 for (cp = buf, i = 0; i < entries - 1; i++) { 2791 protodir[i].d_reclen = DIRSIZ(&protodir[i]); 2792 bcopy(&protodir[i], cp, protodir[i].d_reclen); 2793 cp += protodir[i].d_reclen; 2794 spcleft -= protodir[i].d_reclen; 2795 } 2796 protodir[i].d_reclen = spcleft; 2797 bcopy(&protodir[i], cp, DIRSIZ(&protodir[i])); 2798 return (DIRBLKSIZ); 2799 } 2800 2801 /* 2802 * allocate a block or frag 2803 */ 2804 static daddr32_t 2805 alloc(int size, int mode) 2806 { 2807 int i, frag; 2808 daddr32_t d; 2809 2810 rdfs(fsbtodb(&sblock, (uint64_t)cgtod(&sblock, 0)), sblock.fs_cgsize, 2811 (char *)&acg); 2812 if (acg.cg_magic != CG_MAGIC) { 2813 (void) fprintf(stderr, gettext("cg 0: bad magic number\n")); 2814 lockexit(32); 2815 } 2816 if (acg.cg_cs.cs_nbfree == 0) { 2817 (void) fprintf(stderr, 2818 gettext("first cylinder group ran out of space\n")); 2819 lockexit(32); 2820 } 2821 for (d = 0; d < acg.cg_ndblk; d += sblock.fs_frag) 2822 if (isblock(&sblock, cg_blksfree(&acg), d / sblock.fs_frag)) 2823 goto goth; 2824 (void) fprintf(stderr, 2825 gettext("internal error: can't find block in cyl 0\n")); 2826 lockexit(32); 2827 goth: 2828 clrblock(&sblock, cg_blksfree(&acg), d / sblock.fs_frag); 2829 acg.cg_cs.cs_nbfree--; 2830 sblock.fs_cstotal.cs_nbfree--; 2831 fscs[0].cs_nbfree--; 2832 if (mode & IFDIR) { 2833 acg.cg_cs.cs_ndir++; 2834 sblock.fs_cstotal.cs_ndir++; 2835 fscs[0].cs_ndir++; 2836 } 2837 cg_blktot(&acg)[cbtocylno(&sblock, d)]--; 2838 cg_blks(&sblock, &acg, cbtocylno(&sblock, d))[cbtorpos(&sblock, d)]--; 2839 if (size != sblock.fs_bsize) { 2840 frag = howmany(size, sblock.fs_fsize); 2841 fscs[0].cs_nffree += sblock.fs_frag - frag; 2842 sblock.fs_cstotal.cs_nffree += sblock.fs_frag - frag; 2843 acg.cg_cs.cs_nffree += sblock.fs_frag - frag; 2844 acg.cg_frsum[sblock.fs_frag - frag]++; 2845 for (i = frag; i < sblock.fs_frag; i++) 2846 setbit(cg_blksfree(&acg), d + i); 2847 } 2848 wtfs(fsbtodb(&sblock, (uint64_t)cgtod(&sblock, 0)), sblock.fs_cgsize, 2849 (char *)&acg); 2850 return (d); 2851 } 2852 2853 /* 2854 * Allocate an inode on the disk 2855 */ 2856 static void 2857 iput(struct inode *ip) 2858 { 2859 struct dinode buf[MAXINOPB]; 2860 diskaddr_t d; 2861 2862 rdfs(fsbtodb(&sblock, (uint64_t)cgtod(&sblock, 0)), sblock.fs_cgsize, 2863 (char *)&acg); 2864 if (acg.cg_magic != CG_MAGIC) { 2865 (void) fprintf(stderr, gettext("cg 0: bad magic number\n")); 2866 lockexit(32); 2867 } 2868 acg.cg_cs.cs_nifree--; 2869 setbit(cg_inosused(&acg), ip->i_number); 2870 wtfs(fsbtodb(&sblock, (uint64_t)cgtod(&sblock, 0)), sblock.fs_cgsize, 2871 (char *)&acg); 2872 sblock.fs_cstotal.cs_nifree--; 2873 fscs[0].cs_nifree--; 2874 if ((int)ip->i_number >= sblock.fs_ipg * sblock.fs_ncg) { 2875 (void) fprintf(stderr, 2876 gettext("fsinit: inode value out of range (%d).\n"), 2877 ip->i_number); 2878 lockexit(32); 2879 } 2880 d = fsbtodb(&sblock, (uint64_t)itod(&sblock, (int)ip->i_number)); 2881 rdfs(d, sblock.fs_bsize, (char *)buf); 2882 buf[itoo(&sblock, (int)ip->i_number)].di_ic = ip->i_ic; 2883 wtfs(d, sblock.fs_bsize, (char *)buf); 2884 } 2885 2886 /* 2887 * getbuf() -- Get a buffer for use in an AIO operation. Buffer 2888 * is zero'd the first time returned, left with whatever 2889 * was in memory after that. This function actually gets 2890 * enough memory the first time it's called to support 2891 * MAXBUF buffers like a slab allocator. When all the 2892 * buffers are in use, it waits for an aio to complete 2893 * and make a buffer available. 2894 * 2895 * Never returns an error. Either succeeds or exits. 2896 */ 2897 static char * 2898 getbuf(bufhdr *bufhead, int size) 2899 { 2900 bufhdr *pbuf; 2901 bufhdr *prev; 2902 int i; 2903 int buf_size, max_bufs; 2904 2905 /* 2906 * Initialize all the buffers 2907 */ 2908 if (bufhead->head == NULL) { 2909 /* 2910 * round up the size of our buffer header to a 2911 * 16 byte boundary so the address we return to 2912 * the caller is "suitably aligned". 2913 */ 2914 bufhdrsize = (sizeof (bufhdr) + 15) & ~15; 2915 2916 /* 2917 * Add in our header to the buffer and round it all up to 2918 * a 16 byte boundry so each member of the slab is aligned. 2919 */ 2920 buf_size = (size + bufhdrsize + 15) & ~15; 2921 2922 /* 2923 * Limit number of buffers to lesser of MAXBUFMEM's worth 2924 * or MAXBUF, whichever is less. 2925 */ 2926 max_bufs = MAXBUFMEM / buf_size; 2927 if (max_bufs > MAXBUF) 2928 max_bufs = MAXBUF; 2929 2930 pbuf = (bufhdr *)calloc(max_bufs, buf_size); 2931 if (pbuf == NULL) { 2932 perror("calloc"); 2933 lockexit(32); 2934 } 2935 2936 bufhead->head = bufhead; 2937 prev = bufhead; 2938 for (i = 0; i < max_bufs; i++) { 2939 pbuf->head = bufhead; 2940 prev->next = pbuf; 2941 prev = pbuf; 2942 pbuf = (bufhdr *)((char *)pbuf + buf_size); 2943 } 2944 } 2945 2946 /* 2947 * Get an available buffer, waiting for I/O if necessary 2948 */ 2949 wait_for_write(NOBLOCK); 2950 while (bufhead->next == NULL) 2951 wait_for_write(BLOCK); 2952 2953 /* 2954 * Take the buffer off the list 2955 */ 2956 pbuf = bufhead->next; 2957 bufhead->next = pbuf->next; 2958 pbuf->next = NULL; 2959 2960 /* 2961 * return the empty buffer space just past the header 2962 */ 2963 return ((char *)pbuf + bufhdrsize); 2964 } 2965 2966 /* 2967 * freebuf() -- Free a buffer gotten previously through getbuf. 2968 * Puts the buffer back on the appropriate list for 2969 * later use. Never calls free(). 2970 * 2971 * Assumes that SIGINT is blocked. 2972 */ 2973 static void 2974 freebuf(char *buf) 2975 { 2976 bufhdr *pbuf; 2977 bufhdr *bufhead; 2978 2979 /* 2980 * get the header for this buffer 2981 */ 2982 pbuf = (bufhdr *)(buf - bufhdrsize); 2983 2984 /* 2985 * Put it back on the list of available buffers 2986 */ 2987 bufhead = pbuf->head; 2988 pbuf->next = bufhead->next; 2989 bufhead->next = pbuf; 2990 } 2991 2992 /* 2993 * freetrans() -- Free a transaction gotten previously through getaiop. 2994 * Puts the transaction struct back on the appropriate list for 2995 * later use. Never calls free(). 2996 * 2997 * Assumes that SIGINT is blocked. 2998 */ 2999 static void 3000 freetrans(aio_trans *transp) 3001 { 3002 /* 3003 * free the buffer associated with this AIO if needed 3004 */ 3005 if (transp->release == RELEASE) 3006 freebuf(transp->buffer); 3007 3008 /* 3009 * Put transaction on the free list 3010 */ 3011 transp->next = results.trans; 3012 results.trans = transp; 3013 } 3014 3015 /* 3016 * wait_for_write() -- Wait for an aio write to complete. Return 3017 * the transaction structure for that write. 3018 * 3019 * Blocks SIGINT if necessary. 3020 */ 3021 aio_trans * 3022 wait_for_write(int block) 3023 { 3024 aio_trans *transp; 3025 aio_result_t *resultp; 3026 static struct timeval zero_wait = { 0, 0 }; 3027 sigset_t old_mask; 3028 3029 /* 3030 * If we know there aren't any outstanding transactions, just return 3031 */ 3032 if (results.outstanding == 0) 3033 return ((aio_trans *) 0); 3034 3035 block_sigint(&old_mask); 3036 3037 resultp = aiowait(block ? NULL : &zero_wait); 3038 if (resultp == NULL || 3039 (resultp == (aio_result_t *)-1 && errno == EINVAL)) { 3040 unblock_sigint(&old_mask); 3041 return ((aio_trans *) 0); 3042 } 3043 3044 results.outstanding--; 3045 transp = (aio_trans *)resultp; 3046 3047 if (resultp->aio_return != transp->size) { 3048 if (resultp->aio_return == -1) { 3049 /* 3050 * The aiowrite() may have failed because the 3051 * kernel didn't have enough memory to do the job. 3052 * Flush all pending writes and try a normal 3053 * write(). wtfs_breakup() will call exit if it 3054 * fails, so we don't worry about errors here. 3055 */ 3056 flush_writes(); 3057 wtfs_breakup(transp->bno, transp->size, transp->buffer); 3058 } else { 3059 (void) fprintf(stderr, gettext( 3060 "short write (%d of %d bytes) on sector %lld\n"), 3061 resultp->aio_return, transp->size, 3062 transp->bno); 3063 /* 3064 * Don't unblock SIGINT, to avoid potential 3065 * looping due to queued interrupts and 3066 * error handling. 3067 */ 3068 lockexit(32); 3069 } 3070 } 3071 3072 resultp->aio_return = 0; 3073 freetrans(transp); 3074 unblock_sigint(&old_mask); 3075 return (transp); 3076 } 3077 3078 /* 3079 * flush_writes() -- flush all the outstanding aio writes. 3080 */ 3081 static void 3082 flush_writes(void) 3083 { 3084 while (wait_for_write(BLOCK)) 3085 ; 3086 } 3087 3088 /* 3089 * get_aiop() -- find and return an aio_trans structure on which a new 3090 * aio can be done. Blocks on aiowait() if needed. Reaps 3091 * all outstanding completed aio's. 3092 * 3093 * Assumes that SIGINT is blocked. 3094 */ 3095 aio_trans * 3096 get_aiop() 3097 { 3098 int i; 3099 aio_trans *transp; 3100 aio_trans *prev; 3101 3102 /* 3103 * initialize aio stuff 3104 */ 3105 if (!aio_inited) { 3106 aio_inited = 1; 3107 3108 results.maxpend = 0; 3109 results.outstanding = 0; 3110 results.max = MAXAIO; 3111 3112 results.trans = (aio_trans *)calloc(results.max, 3113 sizeof (aio_trans)); 3114 if (results.trans == NULL) { 3115 perror("calloc"); 3116 lockexit(32); 3117 } 3118 3119 /* 3120 * Initialize the linked list of aio transaction 3121 * structures. Note that the final "next" pointer 3122 * will be NULL since we got the buffer from calloc(). 3123 */ 3124 prev = results.trans; 3125 for (i = 1; i < results.max; i++) { 3126 prev->next = &(results.trans[i]); 3127 prev = prev->next; 3128 } 3129 } 3130 3131 wait_for_write(NOBLOCK); 3132 while (results.trans == NULL) 3133 wait_for_write(BLOCK); 3134 transp = results.trans; 3135 results.trans = results.trans->next; 3136 3137 transp->next = 0; 3138 transp->resultbuf.aio_return = AIO_INPROGRESS; 3139 return (transp); 3140 } 3141 3142 /* 3143 * read a block from the file system 3144 */ 3145 static void 3146 rdfs(diskaddr_t bno, int size, char *bf) 3147 { 3148 int n, saverr; 3149 3150 /* 3151 * In case we need any data that's pending in an aiowrite(), 3152 * we wait for them all to complete before doing a read. 3153 */ 3154 flush_writes(); 3155 3156 /* 3157 * Note: the llseek() can succeed, even if the offset is out of range. 3158 * It's not until the file i/o operation (the read()) that one knows 3159 * for sure if the raw device can handle the offset. 3160 */ 3161 if (llseek(fsi, (offset_t)bno * sectorsize, 0) < 0) { 3162 saverr = errno; 3163 (void) fprintf(stderr, 3164 gettext("seek error on sector %lld: %s\n"), 3165 bno, strerror(saverr)); 3166 lockexit(32); 3167 } 3168 n = read(fsi, bf, size); 3169 if (n != size) { 3170 saverr = errno; 3171 if (n == -1) 3172 (void) fprintf(stderr, 3173 gettext("read error on sector %lld: %s\n"), 3174 bno, strerror(saverr)); 3175 else 3176 (void) fprintf(stderr, gettext( 3177 "short read (%d of %d bytes) on sector %lld\n"), 3178 n, size, bno); 3179 lockexit(32); 3180 } 3181 } 3182 3183 /* 3184 * write a block to the file system 3185 */ 3186 static void 3187 wtfs(diskaddr_t bno, int size, char *bf) 3188 { 3189 int n, saverr; 3190 3191 if (fso == -1) 3192 return; 3193 3194 /* 3195 * Note: the llseek() can succeed, even if the offset is out of range. 3196 * It's not until the file i/o operation (the write()) that one knows 3197 * for sure if the raw device can handle the offset. 3198 */ 3199 if (llseek(fso, (offset_t)bno * sectorsize, 0) < 0) { 3200 saverr = errno; 3201 (void) fprintf(stderr, 3202 gettext("seek error on sector %lld: %s\n"), 3203 bno, strerror(saverr)); 3204 lockexit(32); 3205 } 3206 if (Nflag) 3207 return; 3208 n = write(fso, bf, size); 3209 if (n != size) { 3210 saverr = errno; 3211 if (n == -1) 3212 (void) fprintf(stderr, 3213 gettext("write error on sector %lld: %s\n"), 3214 bno, strerror(saverr)); 3215 else 3216 (void) fprintf(stderr, gettext( 3217 "short write (%d of %d bytes) on sector %lld\n"), 3218 n, size, bno); 3219 lockexit(32); 3220 } 3221 } 3222 3223 /* 3224 * write a block to the file system -- buffered with aio 3225 */ 3226 static void 3227 awtfs(diskaddr_t bno, int size, char *bf, int release) 3228 { 3229 int n; 3230 aio_trans *transp; 3231 sigset_t old_mask; 3232 3233 if (fso == -1) 3234 return; 3235 3236 /* 3237 * We need to keep things consistent if we get interrupted, 3238 * so defer any expected interrupts for the time being. 3239 */ 3240 block_sigint(&old_mask); 3241 3242 if (Nflag) { 3243 if (release == RELEASE) 3244 freebuf(bf); 3245 } else { 3246 transp = get_aiop(); 3247 transp->bno = bno; 3248 transp->buffer = bf; 3249 transp->size = size; 3250 transp->release = release; 3251 3252 n = aiowrite(fso, bf, size, (off_t)bno * sectorsize, 3253 SEEK_SET, &transp->resultbuf); 3254 3255 if (n < 0) { 3256 /* 3257 * The aiowrite() may have failed because the 3258 * kernel didn't have enough memory to do the job. 3259 * Flush all pending writes and try a normal 3260 * write(). wtfs_breakup() will call exit if it 3261 * fails, so we don't worry about errors here. 3262 */ 3263 flush_writes(); 3264 wtfs_breakup(transp->bno, transp->size, transp->buffer); 3265 freetrans(transp); 3266 } else { 3267 /* 3268 * Keep track of our pending writes. 3269 */ 3270 results.outstanding++; 3271 if (results.outstanding > results.maxpend) 3272 results.maxpend = results.outstanding; 3273 } 3274 } 3275 3276 unblock_sigint(&old_mask); 3277 } 3278 3279 3280 /* 3281 * write a block to the file system, but break it up into sbsize 3282 * chunks to avoid forcing a large amount of memory to be locked down. 3283 * Only used as a fallback when an aio write has failed. 3284 */ 3285 static void 3286 wtfs_breakup(diskaddr_t bno, int size, char *bf) 3287 { 3288 int n, saverr; 3289 int wsize; 3290 int block_incr = sbsize / sectorsize; 3291 3292 if (size < sbsize) 3293 wsize = size; 3294 else 3295 wsize = sbsize; 3296 3297 n = 0; 3298 while (size) { 3299 /* 3300 * Note: the llseek() can succeed, even if the offset is 3301 * out of range. It's not until the file i/o operation 3302 * (the write()) that one knows for sure if the raw device 3303 * can handle the offset. 3304 */ 3305 if (llseek(fso, (offset_t)bno * sectorsize, 0) < 0) { 3306 saverr = errno; 3307 (void) fprintf(stderr, 3308 gettext("seek error on sector %lld: %s\n"), 3309 bno, strerror(saverr)); 3310 lockexit(32); 3311 } 3312 3313 n = write(fso, bf, wsize); 3314 if (n == -1) { 3315 saverr = errno; 3316 (void) fprintf(stderr, 3317 gettext("write error on sector %lld: %s\n"), 3318 bno, strerror(saverr)); 3319 lockexit(32); 3320 } 3321 if (n != wsize) { 3322 saverr = errno; 3323 (void) fprintf(stderr, gettext( 3324 "short write (%d of %d bytes) on sector %lld\n"), 3325 n, size, bno); 3326 lockexit(32); 3327 } 3328 3329 bno += block_incr; 3330 bf += wsize; 3331 size -= wsize; 3332 if (size < wsize) 3333 wsize = size; 3334 } 3335 } 3336 3337 3338 /* 3339 * check if a block is available 3340 */ 3341 static int 3342 isblock(struct fs *fs, unsigned char *cp, int h) 3343 { 3344 unsigned char mask; 3345 3346 switch (fs->fs_frag) { 3347 case 8: 3348 return (cp[h] == 0xff); 3349 case 4: 3350 mask = 0x0f << ((h & 0x1) << 2); 3351 return ((cp[h >> 1] & mask) == mask); 3352 case 2: 3353 mask = 0x03 << ((h & 0x3) << 1); 3354 return ((cp[h >> 2] & mask) == mask); 3355 case 1: 3356 mask = 0x01 << (h & 0x7); 3357 return ((cp[h >> 3] & mask) == mask); 3358 default: 3359 (void) fprintf(stderr, "isblock bad fs_frag %d\n", fs->fs_frag); 3360 return (0); 3361 } 3362 } 3363 3364 /* 3365 * take a block out of the map 3366 */ 3367 static void 3368 clrblock(struct fs *fs, unsigned char *cp, int h) 3369 { 3370 switch ((fs)->fs_frag) { 3371 case 8: 3372 cp[h] = 0; 3373 return; 3374 case 4: 3375 cp[h >> 1] &= ~(0x0f << ((h & 0x1) << 2)); 3376 return; 3377 case 2: 3378 cp[h >> 2] &= ~(0x03 << ((h & 0x3) << 1)); 3379 return; 3380 case 1: 3381 cp[h >> 3] &= ~(0x01 << (h & 0x7)); 3382 return; 3383 default: 3384 (void) fprintf(stderr, 3385 gettext("clrblock: bad fs_frag value %d\n"), fs->fs_frag); 3386 return; 3387 } 3388 } 3389 3390 /* 3391 * put a block into the map 3392 */ 3393 static void 3394 setblock(struct fs *fs, unsigned char *cp, int h) 3395 { 3396 switch (fs->fs_frag) { 3397 case 8: 3398 cp[h] = 0xff; 3399 return; 3400 case 4: 3401 cp[h >> 1] |= (0x0f << ((h & 0x1) << 2)); 3402 return; 3403 case 2: 3404 cp[h >> 2] |= (0x03 << ((h & 0x3) << 1)); 3405 return; 3406 case 1: 3407 cp[h >> 3] |= (0x01 << (h & 0x7)); 3408 return; 3409 default: 3410 (void) fprintf(stderr, 3411 gettext("setblock: bad fs_frag value %d\n"), fs->fs_frag); 3412 return; 3413 } 3414 } 3415 3416 static void 3417 usage() 3418 { 3419 (void) fprintf(stderr, 3420 gettext("ufs usage: mkfs [-F FSType] [-V] [-m] [-o options] " 3421 "special " /* param 0 */ 3422 "size(sectors) \\ \n")); /* param 1 */ 3423 (void) fprintf(stderr, 3424 "[nsect " /* param 2 */ 3425 "ntrack " /* param 3 */ 3426 "bsize " /* param 4 */ 3427 "fragsize " /* param 5 */ 3428 "cpg " /* param 6 */ 3429 "free " /* param 7 */ 3430 "rps " /* param 8 */ 3431 "nbpi " /* param 9 */ 3432 "opt " /* param 10 */ 3433 "apc " /* param 11 */ 3434 "gap " /* param 12 */ 3435 "nrpos " /* param 13 */ 3436 "maxcontig " /* param 14 */ 3437 "mtb]\n"); /* param 15 */ 3438 (void) fprintf(stderr, 3439 gettext(" -m : dump fs cmd line used to make this partition\n" 3440 " -V :print this command line and return\n" 3441 " -o :ufs options: :nsect=%d,ntrack=%d,bsize=%d,fragsize=%d\n" 3442 " -o :ufs options: :cgsize=%d,free=%d,rps=%d,nbpi=%d,opt=%c\n" 3443 " -o :ufs options: :apc=%d,gap=%d,nrpos=%d,maxcontig=%d\n" 3444 " -o :ufs options: :mtb=%c,calcsb,calcbinsb\n" 3445 "NOTE that all -o suboptions: must be separated only by commas so as to\n" 3446 "be parsed as a single argument\n"), 3447 nsect, ntrack, bsize, fragsize, cpg, sblock.fs_minfree, rps, 3448 nbpi, opt, apc, (rotdelay == -1) ? 0 : rotdelay, 3449 sblock.fs_nrpos, maxcontig, mtb); 3450 lockexit(32); 3451 } 3452 3453 /*ARGSUSED*/ 3454 static void 3455 dump_fscmd(char *fsys, int fsi) 3456 { 3457 int64_t used, bpcg, inospercg; 3458 int64_t nbpi; 3459 uint64_t nbytes64; 3460 3461 bzero((char *)&sblock, sizeof (sblock)); 3462 rdfs((diskaddr_t)SBLOCK, SBSIZE, (char *)&sblock); 3463 3464 /* 3465 * ensure a valid file system and if not, exit with error or else 3466 * we will end up computing block numbers etc and dividing by zero 3467 * which will cause floating point errors in this routine. 3468 */ 3469 3470 if ((sblock.fs_magic != FS_MAGIC) && 3471 (sblock.fs_magic != MTB_UFS_MAGIC)) { 3472 (void) fprintf(stderr, gettext( 3473 "[not currently a valid file system - bad superblock]\n")); 3474 lockexit(32); 3475 } 3476 3477 if (sblock.fs_magic == FS_MAGIC && 3478 (sblock.fs_version != UFS_EFISTYLE4NONEFI_VERSION_2 && 3479 sblock.fs_version != UFS_VERSION_MIN)) { 3480 (void) fprintf(stderr, gettext( 3481 "Unknown version of UFS format: %d\n"), sblock.fs_version); 3482 lockexit(32); 3483 } 3484 3485 if (sblock.fs_magic == MTB_UFS_MAGIC && 3486 (sblock.fs_version > MTB_UFS_VERSION_1 || 3487 sblock.fs_version < MTB_UFS_VERSION_MIN)) { 3488 (void) fprintf(stderr, gettext( 3489 "Unknown version of UFS format: %d\n"), sblock.fs_version); 3490 lockexit(32); 3491 } 3492 3493 /* 3494 * Compute a reasonable nbpi value. 3495 * The algorithm for "used" is copied from code 3496 * in main() verbatim. 3497 * The nbpi equation is taken from main where the 3498 * fs_ipg value is set for the last time. The INOPB(...) - 1 3499 * is used to account for the roundup. 3500 * The problem is that a range of nbpi values map to 3501 * the same file system layout. So it is not possible 3502 * to calculate the exact value specified when the file 3503 * system was created. So instead we determine the top 3504 * end of the range of values. 3505 */ 3506 bpcg = sblock.fs_spc * sectorsize; 3507 inospercg = (int64_t)roundup(bpcg / sizeof (struct dinode), 3508 INOPB(&sblock)); 3509 if (inospercg > MAXIpG(&sblock)) 3510 inospercg = MAXIpG(&sblock); 3511 used = (int64_t) 3512 (sblock.fs_iblkno + inospercg / INOPF(&sblock)) * NSPF(&sblock); 3513 used *= sectorsize; 3514 nbytes64 = (uint64_t)sblock.fs_cpg * bpcg - used; 3515 3516 /* 3517 * The top end of the range of values for nbpi may not be 3518 * a valid command line value for mkfs. Report the bottom 3519 * end instead. 3520 */ 3521 nbpi = (int64_t)(nbytes64 / (sblock.fs_ipg)); 3522 3523 (void) fprintf(stdout, gettext("mkfs -F ufs -o "), fsys); 3524 (void) fprintf(stdout, "nsect=%d,ntrack=%d,", 3525 sblock.fs_nsect, sblock.fs_ntrak); 3526 (void) fprintf(stdout, "bsize=%d,fragsize=%d,cgsize=%d,free=%d,", 3527 sblock.fs_bsize, sblock.fs_fsize, sblock.fs_cpg, sblock.fs_minfree); 3528 (void) fprintf(stdout, "rps=%d,nbpi=%lld,opt=%c,apc=%d,gap=%d,", 3529 sblock.fs_rps, nbpi, (sblock.fs_optim == FS_OPTSPACE) ? 's' : 't', 3530 (sblock.fs_ntrak * sblock.fs_nsect) - sblock.fs_spc, 3531 sblock.fs_rotdelay); 3532 (void) fprintf(stdout, "nrpos=%d,maxcontig=%d,mtb=%c ", 3533 sblock.fs_nrpos, sblock.fs_maxcontig, 3534 ((sblock.fs_magic == MTB_UFS_MAGIC) ? 'y' : 'n')); 3535 (void) fprintf(stdout, "%s %lld\n", fsys, 3536 fsbtodb(&sblock, sblock.fs_size)); 3537 3538 bzero((char *)&sblock, sizeof (sblock)); 3539 } 3540 3541 /* number ************************************************************* */ 3542 /* */ 3543 /* Convert a numeric string arg to binary */ 3544 /* */ 3545 /* Args: d_value - default value, if have parse error */ 3546 /* param - the name of the argument, for error messages */ 3547 /* flags - parser state and what's allowed in the arg */ 3548 /* Global arg: string - pointer to command arg */ 3549 /* */ 3550 /* Valid forms: 123 | 123k | 123*123 | 123x123 */ 3551 /* */ 3552 /* Return: converted number */ 3553 /* */ 3554 /* ******************************************************************** */ 3555 3556 static uint64_t 3557 number(uint64_t d_value, char *param, int flags) 3558 { 3559 char *cs; 3560 uint64_t n, t; 3561 uint64_t cut = BIG / 10; /* limit to avoid overflow */ 3562 int minus = 0; 3563 3564 cs = string; 3565 if (*cs == '-') { 3566 minus = 1; 3567 cs += 1; 3568 } 3569 if ((*cs < '0') || (*cs > '9')) { 3570 goto bail_out; 3571 } 3572 n = 0; 3573 while ((*cs >= '0') && (*cs <= '9') && (n <= cut)) { 3574 n = n*10 + *cs++ - '0'; 3575 } 3576 if (minus) 3577 n = -n; 3578 for (;;) { 3579 switch (*cs++) { 3580 case 'k': 3581 if (flags & ALLOW_END_ONLY) 3582 goto bail_out; 3583 if (n > (BIG / 1024)) 3584 goto overflow; 3585 n *= 1024; 3586 continue; 3587 3588 case '*': 3589 case 'x': 3590 if (flags & ALLOW_END_ONLY) 3591 goto bail_out; 3592 string = cs; 3593 t = number(d_value, param, flags); 3594 if (n > (BIG / t)) 3595 goto overflow; 3596 n *= t; 3597 cs = string + 1; /* adjust for -- below */ 3598 3599 /* recursion has read rest of expression */ 3600 /* FALLTHROUGH */ 3601 3602 case ',': 3603 case '\0': 3604 cs--; 3605 string = cs; 3606 return (n); 3607 3608 case '%': 3609 if (flags & ALLOW_END_ONLY) 3610 goto bail_out; 3611 if (flags & ALLOW_PERCENT) { 3612 flags &= ~ALLOW_PERCENT; 3613 flags |= ALLOW_END_ONLY; 3614 continue; 3615 } 3616 goto bail_out; 3617 3618 case 'm': 3619 if (flags & ALLOW_END_ONLY) 3620 goto bail_out; 3621 if (flags & ALLOW_MS1) { 3622 flags &= ~ALLOW_MS1; 3623 flags |= ALLOW_MS2; 3624 continue; 3625 } 3626 goto bail_out; 3627 3628 case 's': 3629 if (flags & ALLOW_END_ONLY) 3630 goto bail_out; 3631 if (flags & ALLOW_MS2) { 3632 flags &= ~ALLOW_MS2; 3633 flags |= ALLOW_END_ONLY; 3634 continue; 3635 } 3636 goto bail_out; 3637 3638 case '0': case '1': case '2': case '3': case '4': 3639 case '5': case '6': case '7': case '8': case '9': 3640 overflow: 3641 (void) fprintf(stderr, 3642 gettext("mkfs: value for %s overflowed\n"), 3643 param); 3644 while ((*cs != '\0') && (*cs != ',')) 3645 cs++; 3646 string = cs; 3647 return (BIG); 3648 3649 default: 3650 bail_out: 3651 (void) fprintf(stderr, gettext( 3652 "mkfs: bad numeric arg for %s: \"%s\"\n"), 3653 param, string); 3654 while ((*cs != '\0') && (*cs != ',')) 3655 cs++; 3656 string = cs; 3657 if (d_value != NO_DEFAULT) { 3658 (void) fprintf(stderr, 3659 gettext("mkfs: %s reset to default %lld\n"), 3660 param, d_value); 3661 return (d_value); 3662 } 3663 lockexit(2); 3664 3665 } 3666 } /* never gets here */ 3667 } 3668 3669 /* match ************************************************************** */ 3670 /* */ 3671 /* Compare two text strings for equality */ 3672 /* */ 3673 /* Arg: s - pointer to string to match with a command arg */ 3674 /* Global arg: string - pointer to command arg */ 3675 /* */ 3676 /* Return: 1 if match, 0 if no match */ 3677 /* If match, also reset `string' to point to the text */ 3678 /* that follows the matching text. */ 3679 /* */ 3680 /* ******************************************************************** */ 3681 3682 static int 3683 match(char *s) 3684 { 3685 char *cs; 3686 3687 cs = string; 3688 while (*cs++ == *s) { 3689 if (*s++ == '\0') { 3690 goto true; 3691 } 3692 } 3693 if (*s != '\0') { 3694 return (0); 3695 } 3696 3697 true: 3698 cs--; 3699 string = cs; 3700 return (1); 3701 } 3702 3703 /* 3704 * GROWFS ROUTINES 3705 */ 3706 3707 /* ARGSUSED */ 3708 void 3709 lockexit(int exitstatus) 3710 { 3711 if (Pflag) { 3712 /* the probe mode neither changes nor locks the filesystem */ 3713 exit(exitstatus); 3714 } 3715 3716 /* 3717 * flush the dirty cylinder group 3718 */ 3719 if (inlockexit == 0) { 3720 inlockexit = 1; 3721 flcg(); 3722 } 3723 3724 if (aio_inited) { 3725 flush_writes(); 3726 } 3727 3728 /* 3729 * make sure the file system is unlocked before exiting 3730 */ 3731 if ((inlockexit == 1) && (!isbad)) { 3732 inlockexit = 2; 3733 ulockfs(); 3734 /* 3735 * if logging was enabled, then re-enable it 3736 */ 3737 if (waslog) { 3738 if (rl_log_control(fsys, _FIOLOGENABLE) != RL_SUCCESS) { 3739 (void) fprintf(stderr, gettext( 3740 "failed to re-enable logging\n")); 3741 } 3742 } 3743 } else if (grow) { 3744 if (isbad) { 3745 (void) fprintf(stderr, gettext( 3746 "Filesystem is currently inconsistent. It " 3747 "must be repaired with fsck(1M)\nbefore being " 3748 "used. Use the following command to " 3749 "do this:\n\n\tfsck %s\n\n"), 3750 fsys); 3751 3752 if (ismounted) { 3753 (void) fprintf(stderr, gettext( 3754 "You will be told that the filesystem " 3755 "is already mounted, and asked if you\n" 3756 "wish to continue. Answer `yes' to " 3757 "this question.\n\n")); 3758 } 3759 3760 (void) fprintf(stderr, gettext( 3761 "One problem should be reported, that " 3762 "the summary information is bad.\n" 3763 "You will then be asked if it " 3764 "should be salvaged. Answer `yes' " 3765 "to\nthis question.\n\n")); 3766 } 3767 3768 if (ismounted) { 3769 /* 3770 * In theory, there's no way to get here without 3771 * isbad also being set, but be robust in the 3772 * face of future code changes. 3773 */ 3774 (void) fprintf(stderr, gettext( 3775 "The filesystem is currently mounted " 3776 "read-only and write-locked. ")); 3777 if (isbad) { 3778 (void) fprintf(stderr, gettext( 3779 "After\nrunning fsck, unlock the " 3780 "filesystem and ")); 3781 } else { 3782 (void) fprintf(stderr, gettext( 3783 "Unlock the filesystem\nand ")); 3784 } 3785 3786 (void) fprintf(stderr, gettext( 3787 "re-enable writing with\nthe following " 3788 "command:\n\n\tlockfs -u %s\n\n"), 3789 directory); 3790 } 3791 } 3792 3793 exit(exitstatus); 3794 } 3795 3796 void 3797 randomgeneration() 3798 { 3799 int i; 3800 struct dinode *dp; 3801 3802 /* 3803 * always perform fsirand(1) function... newfs will notice that 3804 * the inodes have been randomized and will not call fsirand itself 3805 */ 3806 for (i = 0, dp = zino; i < sblock.fs_inopb; ++i, ++dp) 3807 IRANDOMIZE(&dp->di_ic); 3808 } 3809 3810 /* 3811 * Check the size of the summary information. 3812 * Fields in sblock are not changed in this function. 3813 * 3814 * For an 8K filesystem block, the maximum number of cylinder groups is 16384. 3815 * MAXCSBUFS {32} * 8K {FS block size} 3816 * divided by (sizeof csum) {16} 3817 * 3818 * Note that MAXCSBUFS is not used in the kernel; as of Solaris 2.6 build 32, 3819 * this is the only place where it's referenced. 3820 */ 3821 void 3822 checksummarysize() 3823 { 3824 diskaddr_t dmax; 3825 diskaddr_t dmin; 3826 int64_t cg0frags; 3827 int64_t cg0blocks; 3828 int64_t maxncg; 3829 int64_t maxfrags; 3830 uint64_t fs_size; 3831 uint64_t maxfs_blocks; /* filesystem blocks for max filesystem size */ 3832 3833 /* 3834 * compute the maximum summary info size 3835 */ 3836 dmin = cgdmin(&sblock, 0); 3837 dmax = cgbase(&sblock, 0) + sblock.fs_fpg; 3838 fs_size = (grow) ? grow_fs_size : sblock.fs_size; 3839 if (dmax > fs_size) 3840 dmax = fs_size; 3841 cg0frags = dmax - dmin; 3842 cg0blocks = cg0frags / sblock.fs_frag; 3843 cg0frags = cg0blocks * sblock.fs_frag; 3844 maxncg = (longlong_t)cg0blocks * 3845 (longlong_t)(sblock.fs_bsize / sizeof (struct csum)); 3846 3847 maxfs_blocks = FS_MAX; 3848 3849 if (maxncg > ((longlong_t)maxfs_blocks / (longlong_t)sblock.fs_fpg) + 1) 3850 maxncg = ((longlong_t)maxfs_blocks / 3851 (longlong_t)sblock.fs_fpg) + 1; 3852 3853 maxfrags = maxncg * (longlong_t)sblock.fs_fpg; 3854 3855 if (maxfrags > maxfs_blocks) 3856 maxfrags = maxfs_blocks; 3857 3858 3859 /* 3860 * remember for later processing in extendsummaryinfo() 3861 */ 3862 if (test) 3863 grow_sifrag = dmin + (cg0blocks * sblock.fs_frag); 3864 if (testfrags == 0) 3865 testfrags = cg0frags; 3866 if (testforce) 3867 if (testfrags > cg0frags) { 3868 (void) fprintf(stderr, 3869 gettext("Too many test frags (%lld); " 3870 "try %lld\n"), testfrags, cg0frags); 3871 lockexit(32); 3872 } 3873 3874 /* 3875 * if summary info is too large (too many cg's) tell the user and exit 3876 */ 3877 if ((longlong_t)sblock.fs_size > maxfrags) { 3878 (void) fprintf(stderr, gettext( 3879 "Too many cylinder groups with %llu sectors;\n try " 3880 "increasing cgsize, or decreasing fssize to %llu\n"), 3881 fsbtodb(&sblock, (uint64_t)sblock.fs_size), 3882 fsbtodb(&sblock, (uint64_t)maxfrags)); 3883 lockexit(32); 3884 } 3885 } 3886 3887 /* 3888 * checksblock() has two uses: 3889 * - One is to sanity test the superblock and is used when newfs(1M) 3890 * is invoked with the "-N" option. If any discrepancy was found, 3891 * just return whatever error was found and do not exit. 3892 * - the other use of it is in places where you expect the superblock 3893 * to be sane, and if it isn't, then we exit. 3894 * Which of the above two actions to take is indicated with the second argument. 3895 */ 3896 3897 int 3898 checksblock(struct fs sb, int proceed) 3899 { 3900 int err = 0; 3901 char *errmsg; 3902 3903 if ((sb.fs_magic != FS_MAGIC) && (sb.fs_magic != MTB_UFS_MAGIC)) { 3904 err = 1; 3905 errmsg = gettext("Bad superblock; magic number wrong\n"); 3906 } else if ((sb.fs_magic == FS_MAGIC && 3907 (sb.fs_version != UFS_EFISTYLE4NONEFI_VERSION_2 && 3908 sb.fs_version != UFS_VERSION_MIN)) || 3909 (sb.fs_magic == MTB_UFS_MAGIC && 3910 (sb.fs_version > MTB_UFS_VERSION_1 || 3911 sb.fs_version < MTB_UFS_VERSION_MIN))) { 3912 err = 2; 3913 errmsg = gettext("Unrecognized version of UFS\n"); 3914 } else if (sb.fs_ncg < 1) { 3915 err = 3; 3916 errmsg = gettext("Bad superblock; ncg out of range\n"); 3917 } else if (sb.fs_cpg < 1) { 3918 err = 4; 3919 errmsg = gettext("Bad superblock; cpg out of range\n"); 3920 } else if (sb.fs_ncg * sb.fs_cpg < sb.fs_ncyl || 3921 (sb.fs_ncg - 1) * sb.fs_cpg >= sb.fs_ncyl) { 3922 err = 5; 3923 errmsg = gettext("Bad superblock; ncyl out of range\n"); 3924 } else if (sb.fs_sbsize <= 0 || sb.fs_sbsize > sb.fs_bsize) { 3925 err = 6; 3926 errmsg = gettext("Bad superblock; superblock size out of range\n"); 3927 } 3928 3929 if (proceed) { 3930 if (err) dprintf(("%s", errmsg)); 3931 return (err); 3932 } 3933 3934 if (err) { 3935 fprintf(stderr, "%s", errmsg); 3936 lockexit(32); 3937 } 3938 return (32); 3939 } 3940 3941 /* 3942 * Roll the embedded log, if any, and set up the global variables 3943 * islog, islogok and isufslog. 3944 */ 3945 static void 3946 logsetup(char *devstr) 3947 { 3948 void *buf, *ud_buf; 3949 extent_block_t *ebp; 3950 ml_unit_t *ul; 3951 ml_odunit_t *ud; 3952 3953 /* 3954 * Does the superblock indicate that we are supposed to have a log ? 3955 */ 3956 if (sblock.fs_logbno == 0) { 3957 /* 3958 * No log present, nothing to do. 3959 */ 3960 islogok = 0; 3961 islog = 0; 3962 isufslog = 0; 3963 return; 3964 } else { 3965 /* 3966 * There's a log in a yet unknown state, attempt to roll it. 3967 */ 3968 islog = 1; 3969 islogok = 0; 3970 isufslog = 0; 3971 3972 /* 3973 * We failed to roll the log, bail out. 3974 */ 3975 if (rl_roll_log(devstr) != RL_SUCCESS) 3976 return; 3977 3978 isufslog = 1; 3979 3980 /* log is not okay; check the fs */ 3981 if ((FSOKAY != (sblock.fs_state + sblock.fs_time)) || 3982 (sblock.fs_clean != FSLOG)) 3983 return; 3984 3985 /* get the log allocation block */ 3986 buf = (void *)malloc(DEV_BSIZE); 3987 if (buf == (void *) NULL) 3988 return; 3989 3990 ud_buf = (void *)malloc(DEV_BSIZE); 3991 if (ud_buf == (void *) NULL) { 3992 free(buf); 3993 return; 3994 } 3995 3996 rdfs((diskaddr_t)logbtodb(&sblock, sblock.fs_logbno), 3997 DEV_BSIZE, buf); 3998 ebp = (extent_block_t *)buf; 3999 4000 /* log allocation block is not okay; check the fs */ 4001 if (ebp->type != LUFS_EXTENTS) { 4002 free(buf); 4003 free(ud_buf); 4004 return; 4005 } 4006 4007 /* get the log state block(s) */ 4008 rdfs((diskaddr_t)logbtodb(&sblock, ebp->extents[0].pbno), 4009 DEV_BSIZE, ud_buf); 4010 ud = (ml_odunit_t *)ud_buf; 4011 ul = (ml_unit_t *)malloc(sizeof (*ul)); 4012 ul->un_ondisk = *ud; 4013 4014 /* log state is okay */ 4015 if ((ul->un_chksum == ul->un_head_ident + ul->un_tail_ident) && 4016 (ul->un_version == LUFS_VERSION_LATEST) && 4017 (ul->un_badlog == 0)) 4018 islogok = 1; 4019 free(ud_buf); 4020 free(buf); 4021 free(ul); 4022 } 4023 } 4024 4025 void 4026 growinit(char *devstr) 4027 { 4028 int i; 4029 char buf[DEV_BSIZE]; 4030 4031 /* 4032 * Read and verify the superblock 4033 */ 4034 rdfs((diskaddr_t)(SBOFF / sectorsize), (int)sbsize, (char *)&sblock); 4035 (void) checksblock(sblock, 0); 4036 if (sblock.fs_postblformat != FS_DYNAMICPOSTBLFMT) { 4037 (void) fprintf(stderr, 4038 gettext("old file system format; can't growfs\n")); 4039 lockexit(32); 4040 } 4041 4042 /* 4043 * can't shrink a file system 4044 */ 4045 grow_fssize = fsbtodb(&sblock, (uint64_t)sblock.fs_size); 4046 if (fssize_db < grow_fssize) { 4047 (void) fprintf(stderr, 4048 gettext("%lld sectors < current size of %lld sectors\n"), 4049 fssize_db, grow_fssize); 4050 lockexit(32); 4051 } 4052 4053 /* 4054 * can't grow a system to over a terabyte unless it was set up 4055 * as an MTB UFS file system. 4056 */ 4057 if (mtb == 'y' && sblock.fs_magic != MTB_UFS_MAGIC) { 4058 if (fssize_db >= SECTORS_PER_TERABYTE) { 4059 (void) fprintf(stderr, gettext( 4060 "File system was not set up with the multi-terabyte format.\n")); 4061 (void) fprintf(stderr, gettext( 4062 "Its size cannot be increased to a terabyte or more.\n")); 4063 } else { 4064 (void) fprintf(stderr, gettext( 4065 "Cannot convert file system to multi-terabyte format.\n")); 4066 } 4067 lockexit(32); 4068 } 4069 4070 logsetup(devstr); 4071 4072 /* 4073 * can't growfs when logging device has errors 4074 */ 4075 if ((islog && !islogok) || 4076 ((FSOKAY == (sblock.fs_state + sblock.fs_time)) && 4077 (sblock.fs_clean == FSLOG && !islog))) { 4078 (void) fprintf(stderr, 4079 gettext("logging device has errors; can't growfs\n")); 4080 lockexit(32); 4081 } 4082 4083 /* 4084 * disable ufs logging for growing 4085 */ 4086 if (isufslog) { 4087 if (rl_log_control(devstr, _FIOLOGDISABLE) != RL_SUCCESS) { 4088 (void) fprintf(stderr, gettext( 4089 "failed to disable logging\n")); 4090 lockexit(32); 4091 } 4092 islog = 0; 4093 waslog = 1; 4094 } 4095 4096 /* 4097 * if mounted write lock the file system to be grown 4098 */ 4099 if (ismounted) 4100 wlockfs(); 4101 4102 /* 4103 * refresh dynamic superblock state - disabling logging will have 4104 * changed the amount of free space available in the file system 4105 */ 4106 rdfs((diskaddr_t)(SBOFF / sectorsize), sbsize, (char *)&sblock); 4107 4108 /* 4109 * make sure device is big enough 4110 */ 4111 rdfs((diskaddr_t)fssize_db - 1, DEV_BSIZE, buf); 4112 wtfs((diskaddr_t)fssize_db - 1, DEV_BSIZE, buf); 4113 4114 /* 4115 * read current summary information 4116 */ 4117 grow_fscs = read_summaryinfo(&sblock); 4118 4119 /* 4120 * save some current size related fields from the superblock 4121 * These are used in extendsummaryinfo() 4122 */ 4123 grow_fs_size = sblock.fs_size; 4124 grow_fs_ncg = sblock.fs_ncg; 4125 grow_fs_csaddr = (diskaddr_t)sblock.fs_csaddr; 4126 grow_fs_cssize = sblock.fs_cssize; 4127 4128 /* 4129 * save and reset the clean flag 4130 */ 4131 if (FSOKAY == (sblock.fs_state + sblock.fs_time)) 4132 grow_fs_clean = sblock.fs_clean; 4133 else 4134 grow_fs_clean = FSBAD; 4135 sblock.fs_clean = FSBAD; 4136 sblock.fs_state = FSOKAY - sblock.fs_time; 4137 isbad = 1; 4138 wtfs((diskaddr_t)(SBOFF / sectorsize), sbsize, (char *)&sblock); 4139 } 4140 4141 void 4142 checkdev(char *rdev, char *bdev) 4143 { 4144 struct stat64 statarea; 4145 4146 if (stat64(bdev, &statarea) < 0) { 4147 (void) fprintf(stderr, gettext("can't check mount point; ")); 4148 (void) fprintf(stderr, gettext("can't stat %s\n"), bdev); 4149 lockexit(32); 4150 } 4151 if ((statarea.st_mode & S_IFMT) != S_IFBLK) { 4152 (void) fprintf(stderr, gettext( 4153 "can't check mount point; %s is not a block device\n"), 4154 bdev); 4155 lockexit(32); 4156 } 4157 if (stat64(rdev, &statarea) < 0) { 4158 (void) fprintf(stderr, gettext("can't stat %s\n"), rdev); 4159 lockexit(32); 4160 } 4161 if ((statarea.st_mode & S_IFMT) != S_IFCHR) { 4162 (void) fprintf(stderr, 4163 gettext("%s is not a character device\n"), rdev); 4164 lockexit(32); 4165 } 4166 } 4167 4168 void 4169 checkmount(struct mnttab *mntp, char *bdevname) 4170 { 4171 struct stat64 statdir; 4172 struct stat64 statdev; 4173 4174 if (strcmp(bdevname, mntp->mnt_special) == 0) { 4175 if (stat64(mntp->mnt_mountp, &statdir) == -1) { 4176 (void) fprintf(stderr, gettext("can't stat %s\n"), 4177 mntp->mnt_mountp); 4178 lockexit(32); 4179 } 4180 if (stat64(mntp->mnt_special, &statdev) == -1) { 4181 (void) fprintf(stderr, gettext("can't stat %s\n"), 4182 mntp->mnt_special); 4183 lockexit(32); 4184 } 4185 if (statdir.st_dev != statdev.st_rdev) { 4186 (void) fprintf(stderr, gettext( 4187 "%s is not mounted on %s; mnttab(4) wrong\n"), 4188 mntp->mnt_special, mntp->mnt_mountp); 4189 lockexit(32); 4190 } 4191 ismounted = 1; 4192 if (directory) { 4193 if (strcmp(mntp->mnt_mountp, directory) != 0) { 4194 (void) fprintf(stderr, 4195 gettext("%s is mounted on %s, not %s\n"), 4196 bdevname, mntp->mnt_mountp, directory); 4197 lockexit(32); 4198 } 4199 } else { 4200 if (grow) 4201 (void) fprintf(stderr, gettext( 4202 "%s is mounted on %s; can't growfs\n"), 4203 bdevname, mntp->mnt_mountp); 4204 else 4205 (void) fprintf(stderr, 4206 gettext("%s is mounted, can't mkfs\n"), 4207 bdevname); 4208 lockexit(32); 4209 } 4210 } 4211 } 4212 4213 struct dinode *dibuf = 0; 4214 diskaddr_t difrag = 0; 4215 4216 struct dinode * 4217 gdinode(ino_t ino) 4218 { 4219 /* 4220 * read the block of inodes containing inode number ino 4221 */ 4222 if (dibuf == 0) 4223 dibuf = (struct dinode *)malloc((unsigned)sblock.fs_bsize); 4224 if (itod(&sblock, ino) != difrag) { 4225 difrag = itod(&sblock, ino); 4226 rdfs(fsbtodb(&sblock, (uint64_t)difrag), (int)sblock.fs_bsize, 4227 (char *)dibuf); 4228 } 4229 return (dibuf + (ino % INOPB(&sblock))); 4230 } 4231 4232 /* 4233 * structure that manages the frags we need for extended summary info 4234 * These frags can be: 4235 * free 4236 * data block 4237 * alloc block 4238 */ 4239 struct csfrag { 4240 struct csfrag *next; /* next entry */ 4241 daddr32_t ofrag; /* old frag */ 4242 daddr32_t nfrag; /* new frag */ 4243 long cylno; /* cylno of nfrag */ 4244 long frags; /* number of frags */ 4245 long size; /* size in bytes */ 4246 ino_t ino; /* inode number */ 4247 long fixed; /* Boolean - Already fixed? */ 4248 }; 4249 struct csfrag *csfrag; /* state unknown */ 4250 struct csfrag *csfragino; /* frags belonging to an inode */ 4251 struct csfrag *csfragfree; /* frags that are free */ 4252 4253 daddr32_t maxcsfrag = 0; /* maximum in range */ 4254 daddr32_t mincsfrag = 0x7fffffff; /* minimum in range */ 4255 4256 int 4257 csfraginrange(daddr32_t frag) 4258 { 4259 return ((frag >= mincsfrag) && (frag <= maxcsfrag)); 4260 } 4261 4262 struct csfrag * 4263 findcsfrag(daddr32_t frag, struct csfrag **cfap) 4264 { 4265 struct csfrag *cfp; 4266 4267 if (!csfraginrange(frag)) 4268 return (NULL); 4269 4270 for (cfp = *cfap; cfp; cfp = cfp->next) 4271 if (cfp->ofrag == frag) 4272 return (cfp); 4273 return (NULL); 4274 } 4275 4276 void 4277 checkindirect(ino_t ino, daddr32_t *fragsp, daddr32_t frag, int level) 4278 { 4279 int i; 4280 int ne = sblock.fs_bsize / sizeof (daddr32_t); 4281 daddr32_t fsb[MAXBSIZE / sizeof (daddr32_t)]; 4282 4283 if (frag == 0) 4284 return; 4285 4286 rdfs(fsbtodb(&sblock, frag), (int)sblock.fs_bsize, 4287 (char *)fsb); 4288 4289 checkdirect(ino, fragsp, fsb, sblock.fs_bsize / sizeof (daddr32_t)); 4290 4291 if (level) 4292 for (i = 0; i < ne && *fragsp; ++i) 4293 checkindirect(ino, fragsp, fsb[i], level-1); 4294 } 4295 4296 void 4297 addcsfrag(ino_t ino, daddr32_t frag, struct csfrag **cfap) 4298 { 4299 struct csfrag *cfp, *curr, *prev; 4300 4301 /* 4302 * establish a range for faster checking in csfraginrange() 4303 */ 4304 if (frag > maxcsfrag) 4305 maxcsfrag = frag; 4306 if (frag < mincsfrag) 4307 mincsfrag = frag; 4308 4309 /* 4310 * if this frag belongs to an inode and is not the start of a block 4311 * then see if it is part of a frag range for this inode 4312 */ 4313 if (ino && (frag % sblock.fs_frag)) 4314 for (cfp = *cfap; cfp; cfp = cfp->next) { 4315 if (ino != cfp->ino) 4316 continue; 4317 if (frag != cfp->ofrag + cfp->frags) 4318 continue; 4319 cfp->frags++; 4320 cfp->size += sblock.fs_fsize; 4321 return; 4322 } 4323 /* 4324 * allocate a csfrag entry and insert it in an increasing order into the 4325 * specified list 4326 */ 4327 cfp = (struct csfrag *)calloc(1, sizeof (struct csfrag)); 4328 cfp->ino = ino; 4329 cfp->ofrag = frag; 4330 cfp->frags = 1; 4331 cfp->size = sblock.fs_fsize; 4332 for (prev = NULL, curr = *cfap; curr != NULL; 4333 prev = curr, curr = curr->next) { 4334 if (frag < curr->ofrag) { 4335 cfp->next = curr; 4336 if (prev) 4337 prev->next = cfp; /* middle element */ 4338 else 4339 *cfap = cfp; /* first element */ 4340 break; 4341 } 4342 if (curr->next == NULL) { 4343 curr->next = cfp; /* last element */ 4344 break; 4345 } 4346 } 4347 if (*cfap == NULL) /* will happen only once */ 4348 *cfap = cfp; 4349 } 4350 4351 void 4352 delcsfrag(daddr32_t frag, struct csfrag **cfap) 4353 { 4354 struct csfrag *cfp; 4355 struct csfrag **cfpp; 4356 4357 /* 4358 * free up entry whose beginning frag matches 4359 */ 4360 for (cfpp = cfap; *cfpp; cfpp = &(*cfpp)->next) { 4361 if (frag == (*cfpp)->ofrag) { 4362 cfp = *cfpp; 4363 *cfpp = (*cfpp)->next; 4364 free((char *)cfp); 4365 return; 4366 } 4367 } 4368 } 4369 4370 /* 4371 * See whether any of the direct blocks in the array pointed by "db" and of 4372 * length "ne" are within the range of frags needed to extend the cylinder 4373 * summary. If so, remove those frags from the "as-yet-unclassified" list 4374 * (csfrag) and add them to the "owned-by-inode" list (csfragino). 4375 * For each such frag found, decrement the frag count pointed to by fragsp. 4376 * "ino" is the inode that contains (either directly or indirectly) the frags 4377 * being checked. 4378 */ 4379 void 4380 checkdirect(ino_t ino, daddr32_t *fragsp, daddr32_t *db, int ne) 4381 { 4382 int i; 4383 int j; 4384 int found; 4385 diskaddr_t frag; 4386 4387 /* 4388 * scan for allocation within the new summary info range 4389 */ 4390 for (i = 0; i < ne && *fragsp; ++i) { 4391 if ((frag = *db++) != 0) { 4392 found = 0; 4393 for (j = 0; j < sblock.fs_frag && *fragsp; ++j) { 4394 if (found || (found = csfraginrange(frag))) { 4395 addcsfrag(ino, frag, &csfragino); 4396 delcsfrag(frag, &csfrag); 4397 } 4398 ++frag; 4399 --(*fragsp); 4400 } 4401 } 4402 } 4403 } 4404 4405 void 4406 findcsfragino() 4407 { 4408 int i; 4409 int j; 4410 daddr32_t frags; 4411 struct dinode *dp; 4412 4413 /* 4414 * scan all old inodes looking for allocations in the new 4415 * summary info range. Move the affected frag from the 4416 * generic csfrag list onto the `owned-by-inode' list csfragino. 4417 */ 4418 for (i = UFSROOTINO; i < grow_fs_ncg*sblock.fs_ipg && csfrag; ++i) { 4419 dp = gdinode((ino_t)i); 4420 switch (dp->di_mode & IFMT) { 4421 case IFSHAD : 4422 case IFLNK : 4423 case IFDIR : 4424 case IFREG : break; 4425 default : continue; 4426 } 4427 4428 frags = dbtofsb(&sblock, dp->di_blocks); 4429 4430 checkdirect((ino_t)i, &frags, &dp->di_db[0], NDADDR+NIADDR); 4431 for (j = 0; j < NIADDR && frags; ++j) { 4432 /* Negate the block if its an fallocate'd block */ 4433 if (dp->di_ib[j] < 0 && dp->di_ib[j] != UFS_HOLE) 4434 checkindirect((ino_t)i, &frags, 4435 -(dp->di_ib[j]), j); 4436 else 4437 checkindirect((ino_t)i, &frags, 4438 dp->di_ib[j], j); 4439 } 4440 } 4441 } 4442 4443 void 4444 fixindirect(daddr32_t frag, int level) 4445 { 4446 int i; 4447 int ne = sblock.fs_bsize / sizeof (daddr32_t); 4448 daddr32_t fsb[MAXBSIZE / sizeof (daddr32_t)]; 4449 4450 if (frag == 0) 4451 return; 4452 4453 rdfs(fsbtodb(&sblock, (uint64_t)frag), (int)sblock.fs_bsize, 4454 (char *)fsb); 4455 4456 fixdirect((caddr_t)fsb, frag, fsb, ne); 4457 4458 if (level) 4459 for (i = 0; i < ne; ++i) 4460 fixindirect(fsb[i], level-1); 4461 } 4462 4463 void 4464 fixdirect(caddr_t bp, daddr32_t frag, daddr32_t *db, int ne) 4465 { 4466 int i; 4467 struct csfrag *cfp; 4468 4469 for (i = 0; i < ne; ++i, ++db) { 4470 if (*db == 0) 4471 continue; 4472 if ((cfp = findcsfrag(*db, &csfragino)) == NULL) 4473 continue; 4474 *db = cfp->nfrag; 4475 cfp->fixed = 1; 4476 wtfs(fsbtodb(&sblock, (uint64_t)frag), (int)sblock.fs_bsize, 4477 bp); 4478 } 4479 } 4480 4481 void 4482 fixcsfragino() 4483 { 4484 int i; 4485 struct dinode *dp; 4486 struct csfrag *cfp; 4487 4488 for (cfp = csfragino; cfp; cfp = cfp->next) { 4489 if (cfp->fixed) 4490 continue; 4491 dp = gdinode((ino_t)cfp->ino); 4492 fixdirect((caddr_t)dibuf, difrag, dp->di_db, NDADDR+NIADDR); 4493 for (i = 0; i < NIADDR; ++i) 4494 fixindirect(dp->di_ib[i], i); 4495 } 4496 } 4497 4498 /* 4499 * Read the cylinders summary information specified by settings in the 4500 * passed 'fs' structure into a new allocated array of csum structures. 4501 * The caller is responsible for freeing the returned array. 4502 * Return a pointer to an array of csum structures. 4503 */ 4504 static struct csum * 4505 read_summaryinfo(struct fs *fsp) 4506 { 4507 struct csum *csp; 4508 int i; 4509 4510 if ((csp = malloc((size_t)fsp->fs_cssize)) == NULL) { 4511 (void) fprintf(stderr, gettext("cannot create csum list," 4512 " not enough memory\n")); 4513 exit(32); 4514 } 4515 4516 for (i = 0; i < fsp->fs_cssize; i += fsp->fs_bsize) { 4517 rdfs(fsbtodb(fsp, 4518 (uint64_t)(fsp->fs_csaddr + numfrags(fsp, i))), 4519 (int)(fsp->fs_cssize - i < fsp->fs_bsize ? 4520 fsp->fs_cssize - i : fsp->fs_bsize), 4521 ((caddr_t)csp) + i); 4522 } 4523 4524 return (csp); 4525 } 4526 4527 /* 4528 * Check the allocation of fragments that are to be made part of a csum block. 4529 * A fragment is allocated if it is either in the csfragfree list or, it is 4530 * in the csfragino list and has new frags associated with it. 4531 * Return the number of allocated fragments. 4532 */ 4533 int64_t 4534 checkfragallocated(daddr32_t frag) 4535 { 4536 struct csfrag *cfp; 4537 /* 4538 * Since the lists are sorted we can break the search if the asked 4539 * frag is smaller then the one in the list. 4540 */ 4541 for (cfp = csfragfree; cfp != NULL && frag >= cfp->ofrag; 4542 cfp = cfp->next) { 4543 if (frag == cfp->ofrag) 4544 return (1); 4545 } 4546 for (cfp = csfragino; cfp != NULL && frag >= cfp->ofrag; 4547 cfp = cfp->next) { 4548 if (frag == cfp->ofrag && cfp->nfrag != 0) 4549 return (cfp->frags); 4550 } 4551 4552 return (0); 4553 } 4554 4555 /* 4556 * Figure out how much the filesystem can be grown. The limiting factor is 4557 * the available free space needed to extend the cg summary info block. 4558 * The free space is determined in three steps: 4559 * - Try to extend the cg summary block to the required size. 4560 * - Find free blocks in last cg. 4561 * - Find free space in the last already allocated fragment of the summary info 4562 * block, and use it for additional csum structures. 4563 * Return the maximum size of the new filesystem or 0 if it can't be grown. 4564 * Please note that this function leaves the global list pointers csfrag, 4565 * csfragfree, and csfragino initialized, and the caller is responsible for 4566 * freeing the lists. 4567 */ 4568 diskaddr_t 4569 probe_summaryinfo() 4570 { 4571 /* fragments by which the csum block can be extended. */ 4572 int64_t growth_csum_frags = 0; 4573 /* fragments by which the filesystem can be extended. */ 4574 int64_t growth_fs_frags = 0; 4575 int64_t new_fs_cssize; /* size of csum blk in the new FS */ 4576 int64_t new_fs_ncg; /* number of cg in the new FS */ 4577 int64_t spare_csum; 4578 daddr32_t oldfrag_daddr; 4579 daddr32_t newfrag_daddr; 4580 daddr32_t daddr; 4581 int i; 4582 4583 /* 4584 * read and verify the superblock 4585 */ 4586 rdfs((diskaddr_t)(SBOFF / sectorsize), (int)sbsize, (char *)&sblock); 4587 (void) checksblock(sblock, 0); 4588 4589 /* 4590 * check how much we can extend the cg summary info block 4591 */ 4592 4593 /* 4594 * read current summary information 4595 */ 4596 fscs = read_summaryinfo(&sblock); 4597 4598 /* 4599 * build list of frags needed for cg summary info block extension 4600 */ 4601 oldfrag_daddr = howmany(sblock.fs_cssize, sblock.fs_fsize) + 4602 sblock.fs_csaddr; 4603 new_fs_ncg = howmany(dbtofsb(&sblock, fssize_db), sblock.fs_fpg); 4604 new_fs_cssize = fragroundup(&sblock, new_fs_ncg * sizeof (struct csum)); 4605 newfrag_daddr = howmany(new_fs_cssize, sblock.fs_fsize) + 4606 sblock.fs_csaddr; 4607 /* 4608 * add all of the frags that are required to grow the cyl summary to the 4609 * csfrag list, which is the generic/unknown list, since at this point 4610 * we don't yet know the state of those frags. 4611 */ 4612 for (daddr = oldfrag_daddr; daddr < newfrag_daddr; daddr++) 4613 addcsfrag((ino_t)0, daddr, &csfrag); 4614 4615 /* 4616 * filter free fragments and allocate them. Note that the free frags 4617 * must be allocated first otherwise they could be grabbed by 4618 * alloccsfragino() for data frags. 4619 */ 4620 findcsfragfree(); 4621 alloccsfragfree(); 4622 4623 /* 4624 * filter fragments owned by inodes and allocate them 4625 */ 4626 grow_fs_ncg = sblock.fs_ncg; /* findcsfragino() needs this glob. var. */ 4627 findcsfragino(); 4628 alloccsfragino(); 4629 4630 if (notenoughspace()) { 4631 /* 4632 * check how many consecutive fragments could be allocated 4633 * in both lists. 4634 */ 4635 int64_t tmp_frags; 4636 for (daddr = oldfrag_daddr; daddr < newfrag_daddr; 4637 daddr += tmp_frags) { 4638 if ((tmp_frags = checkfragallocated(daddr)) > 0) 4639 growth_csum_frags += tmp_frags; 4640 else 4641 break; 4642 } 4643 } else { 4644 /* 4645 * We have all we need for the new desired size, 4646 * so clean up and report back. 4647 */ 4648 return (fssize_db); 4649 } 4650 4651 /* 4652 * given the number of fragments by which the csum block can be grown 4653 * compute by how many new fragments the FS can be increased. 4654 * It is the number of csum instances per fragment multiplied by 4655 * `growth_csum_frags' and the number of fragments per cylinder group. 4656 */ 4657 growth_fs_frags = howmany(sblock.fs_fsize, sizeof (struct csum)) * 4658 growth_csum_frags * sblock.fs_fpg; 4659 4660 /* 4661 * compute free fragments in the last cylinder group 4662 */ 4663 rdcg(sblock.fs_ncg - 1); 4664 growth_fs_frags += sblock.fs_fpg - acg.cg_ndblk; 4665 4666 /* 4667 * compute how many csum instances are unused in the old csum block. 4668 * For each unused csum instance the FS can be grown by one cylinder 4669 * group without extending the csum block. 4670 */ 4671 spare_csum = howmany(sblock.fs_cssize, sizeof (struct csum)) - 4672 sblock.fs_ncg; 4673 if (spare_csum > 0) 4674 growth_fs_frags += spare_csum * sblock.fs_fpg; 4675 4676 /* 4677 * recalculate the new filesystem size in sectors, shorten it by 4678 * the requested size `fssize_db' if necessary. 4679 */ 4680 if (growth_fs_frags > 0) { 4681 diskaddr_t sect; 4682 sect = (sblock.fs_size + growth_fs_frags) * sblock.fs_nspf; 4683 return ((sect > fssize_db) ? fssize_db : sect); 4684 } 4685 4686 return (0); 4687 } 4688 4689 void 4690 extendsummaryinfo() 4691 { 4692 int64_t i; 4693 int localtest = test; 4694 int64_t frags; 4695 daddr32_t oldfrag; 4696 daddr32_t newfrag; 4697 4698 /* 4699 * if no-write (-N), don't bother 4700 */ 4701 if (Nflag) 4702 return; 4703 4704 again: 4705 flcg(); 4706 /* 4707 * summary info did not change size -- do nothing unless in test mode 4708 */ 4709 if (grow_fs_cssize == sblock.fs_cssize) 4710 if (!localtest) 4711 return; 4712 4713 /* 4714 * build list of frags needed for additional summary information 4715 */ 4716 oldfrag = howmany(grow_fs_cssize, sblock.fs_fsize) + grow_fs_csaddr; 4717 newfrag = howmany(sblock.fs_cssize, sblock.fs_fsize) + grow_fs_csaddr; 4718 /* 4719 * add all of the frags that are required to grow the cyl summary to the 4720 * csfrag list, which is the generic/unknown list, since at this point 4721 * we don't yet know the state of those frags. 4722 */ 4723 for (i = oldfrag, frags = 0; i < newfrag; ++i, ++frags) 4724 addcsfrag((ino_t)0, (diskaddr_t)i, &csfrag); 4725 /* 4726 * reduce the number of data blocks in the file system (fs_dsize) by 4727 * the number of frags that need to be added to the cyl summary 4728 */ 4729 sblock.fs_dsize -= (newfrag - oldfrag); 4730 4731 /* 4732 * In test mode, we move more data than necessary from 4733 * cylinder group 0. The lookup/allocate/move code can be 4734 * better stressed without having to create HUGE file systems. 4735 */ 4736 if (localtest) 4737 for (i = newfrag; i < grow_sifrag; ++i) { 4738 if (frags >= testfrags) 4739 break; 4740 frags++; 4741 addcsfrag((ino_t)0, (diskaddr_t)i, &csfrag); 4742 } 4743 4744 /* 4745 * move frags to free or inode lists, depending on owner 4746 */ 4747 findcsfragfree(); 4748 findcsfragino(); 4749 4750 /* 4751 * if not all frags can be located, file system must be inconsistent 4752 */ 4753 if (csfrag) { 4754 isbad = 1; /* should already be set, but make sure */ 4755 lockexit(32); 4756 } 4757 4758 /* 4759 * allocate the free frags. Note that the free frags must be allocated 4760 * first otherwise they could be grabbed by alloccsfragino() for data 4761 * frags. 4762 */ 4763 alloccsfragfree(); 4764 /* 4765 * allocate extra space for inode frags 4766 */ 4767 alloccsfragino(); 4768 4769 /* 4770 * not enough space 4771 */ 4772 if (notenoughspace()) { 4773 unalloccsfragfree(); 4774 unalloccsfragino(); 4775 if (localtest && !testforce) { 4776 localtest = 0; 4777 goto again; 4778 } 4779 (void) fprintf(stderr, gettext("Not enough free space\n")); 4780 lockexit(NOTENOUGHSPACE); 4781 } 4782 4783 /* 4784 * copy the data from old frags to new frags 4785 */ 4786 copycsfragino(); 4787 4788 /* 4789 * fix the inodes to point to the new frags 4790 */ 4791 fixcsfragino(); 4792 4793 /* 4794 * We may have moved more frags than we needed. Free them. 4795 */ 4796 rdcg((long)0); 4797 for (i = newfrag; i <= maxcsfrag; ++i) 4798 setbit(cg_blksfree(&acg), i-cgbase(&sblock, 0)); 4799 wtcg(); 4800 4801 flcg(); 4802 } 4803 4804 /* 4805 * Check if all fragments in the `csfragino' list were reallocated. 4806 */ 4807 int 4808 notenoughspace() 4809 { 4810 struct csfrag *cfp; 4811 4812 /* 4813 * If any element in the csfragino array has a "new frag location" 4814 * of 0, the allocfrags() function was unsuccessful in allocating 4815 * space for moving the frag represented by this array element. 4816 */ 4817 for (cfp = csfragino; cfp; cfp = cfp->next) 4818 if (cfp->nfrag == 0) 4819 return (1); 4820 return (0); 4821 } 4822 4823 void 4824 unalloccsfragino() 4825 { 4826 struct csfrag *cfp; 4827 4828 while ((cfp = csfragino) != NULL) { 4829 if (cfp->nfrag) 4830 freefrags(cfp->nfrag, cfp->frags, cfp->cylno); 4831 delcsfrag(cfp->ofrag, &csfragino); 4832 } 4833 } 4834 4835 void 4836 unalloccsfragfree() 4837 { 4838 struct csfrag *cfp; 4839 4840 while ((cfp = csfragfree) != NULL) { 4841 freefrags(cfp->ofrag, cfp->frags, cfp->cylno); 4842 delcsfrag(cfp->ofrag, &csfragfree); 4843 } 4844 } 4845 4846 /* 4847 * For each frag in the "as-yet-unclassified" list (csfrag), see if 4848 * it's free (i.e., its bit is set in the free frag bit map). If so, 4849 * move it from the "as-yet-unclassified" list to the csfragfree list. 4850 */ 4851 void 4852 findcsfragfree() 4853 { 4854 struct csfrag *cfp; 4855 struct csfrag *cfpnext; 4856 4857 /* 4858 * move free frags onto the free-frag list 4859 */ 4860 rdcg((long)0); 4861 for (cfp = csfrag; cfp; cfp = cfpnext) { 4862 cfpnext = cfp->next; 4863 if (isset(cg_blksfree(&acg), cfp->ofrag - cgbase(&sblock, 0))) { 4864 addcsfrag(cfp->ino, cfp->ofrag, &csfragfree); 4865 delcsfrag(cfp->ofrag, &csfrag); 4866 } 4867 } 4868 } 4869 4870 void 4871 copycsfragino() 4872 { 4873 struct csfrag *cfp; 4874 char buf[MAXBSIZE]; 4875 4876 /* 4877 * copy data from old frags to newly allocated frags 4878 */ 4879 for (cfp = csfragino; cfp; cfp = cfp->next) { 4880 rdfs(fsbtodb(&sblock, (uint64_t)cfp->ofrag), (int)cfp->size, 4881 buf); 4882 wtfs(fsbtodb(&sblock, (uint64_t)cfp->nfrag), (int)cfp->size, 4883 buf); 4884 } 4885 } 4886 4887 long curcylno = -1; 4888 int cylnodirty = 0; 4889 4890 void 4891 rdcg(long cylno) 4892 { 4893 if (cylno != curcylno) { 4894 flcg(); 4895 curcylno = cylno; 4896 rdfs(fsbtodb(&sblock, (uint64_t)cgtod(&sblock, curcylno)), 4897 (int)sblock.fs_cgsize, (char *)&acg); 4898 } 4899 } 4900 4901 void 4902 flcg() 4903 { 4904 if (cylnodirty) { 4905 if (debug && Pflag) { 4906 (void) fprintf(stderr, 4907 "Assert: cylnodirty set in probe mode\n"); 4908 return; 4909 } 4910 resetallocinfo(); 4911 wtfs(fsbtodb(&sblock, (uint64_t)cgtod(&sblock, curcylno)), 4912 (int)sblock.fs_cgsize, (char *)&acg); 4913 cylnodirty = 0; 4914 } 4915 curcylno = -1; 4916 } 4917 4918 void 4919 wtcg() 4920 { 4921 if (!Pflag) { 4922 /* probe mode should never write to disk */ 4923 cylnodirty = 1; 4924 } 4925 } 4926 4927 void 4928 allocfrags(long frags, daddr32_t *fragp, long *cylnop) 4929 { 4930 int i; 4931 int j; 4932 long bits; 4933 long bit; 4934 4935 /* 4936 * Allocate a free-frag range in an old cylinder group 4937 */ 4938 for (i = 0, *fragp = 0; i < grow_fs_ncg; ++i) { 4939 if (((fscs+i)->cs_nffree < frags) && ((fscs+i)->cs_nbfree == 0)) 4940 continue; 4941 rdcg((long)i); 4942 bit = bits = 0; 4943 while (findfreerange(&bit, &bits)) { 4944 if (frags <= bits) { 4945 for (j = 0; j < frags; ++j) 4946 clrbit(cg_blksfree(&acg), bit+j); 4947 wtcg(); 4948 *cylnop = i; 4949 *fragp = bit + cgbase(&sblock, i); 4950 return; 4951 } 4952 bit += bits; 4953 } 4954 } 4955 } 4956 4957 /* 4958 * Allocate space for frags that need to be moved in order to free up space for 4959 * expanding the cylinder summary info. 4960 * For each frag that needs to be moved (each frag or range of frags in 4961 * the csfragino list), allocate a new location and store the frag number 4962 * of that new location in the nfrag field of the csfrag struct. 4963 * If a new frag can't be allocated for any element in the csfragino list, 4964 * set the new frag number for that element to 0 and return immediately. 4965 * The notenoughspace() function will detect this condition. 4966 */ 4967 void 4968 alloccsfragino() 4969 { 4970 struct csfrag *cfp; 4971 4972 /* 4973 * allocate space for inode frag ranges 4974 */ 4975 for (cfp = csfragino; cfp; cfp = cfp->next) { 4976 allocfrags(cfp->frags, &cfp->nfrag, &cfp->cylno); 4977 if (cfp->nfrag == 0) 4978 break; 4979 } 4980 } 4981 4982 void 4983 alloccsfragfree() 4984 { 4985 struct csfrag *cfp; 4986 4987 /* 4988 * allocate the free frags needed for extended summary info 4989 */ 4990 rdcg((long)0); 4991 4992 for (cfp = csfragfree; cfp; cfp = cfp->next) 4993 clrbit(cg_blksfree(&acg), cfp->ofrag - cgbase(&sblock, 0)); 4994 4995 wtcg(); 4996 } 4997 4998 void 4999 freefrags(daddr32_t frag, long frags, long cylno) 5000 { 5001 int i; 5002 5003 /* 5004 * free frags 5005 */ 5006 rdcg(cylno); 5007 for (i = 0; i < frags; ++i) { 5008 setbit(cg_blksfree(&acg), (frag+i) - cgbase(&sblock, cylno)); 5009 } 5010 wtcg(); 5011 } 5012 5013 int 5014 findfreerange(long *bitp, long *bitsp) 5015 { 5016 long bit; 5017 5018 /* 5019 * find a range of free bits in a cylinder group bit map 5020 */ 5021 for (bit = *bitp, *bitsp = 0; bit < acg.cg_ndblk; ++bit) 5022 if (isset(cg_blksfree(&acg), bit)) 5023 break; 5024 5025 if (bit >= acg.cg_ndblk) 5026 return (0); 5027 5028 *bitp = bit; 5029 *bitsp = 1; 5030 for (++bit; bit < acg.cg_ndblk; ++bit, ++(*bitsp)) { 5031 if ((bit % sblock.fs_frag) == 0) 5032 break; 5033 if (isclr(cg_blksfree(&acg), bit)) 5034 break; 5035 } 5036 return (1); 5037 } 5038 5039 void 5040 resetallocinfo() 5041 { 5042 long cno; 5043 long bit; 5044 long bits; 5045 5046 /* 5047 * Compute the free blocks/frags info and update the appropriate 5048 * inmemory superblock, summary info, and cylinder group fields 5049 */ 5050 sblock.fs_cstotal.cs_nffree -= acg.cg_cs.cs_nffree; 5051 sblock.fs_cstotal.cs_nbfree -= acg.cg_cs.cs_nbfree; 5052 5053 acg.cg_cs.cs_nffree = 0; 5054 acg.cg_cs.cs_nbfree = 0; 5055 5056 bzero((caddr_t)acg.cg_frsum, sizeof (acg.cg_frsum)); 5057 bzero((caddr_t)cg_blktot(&acg), (int)(acg.cg_iusedoff-acg.cg_btotoff)); 5058 5059 bit = bits = 0; 5060 while (findfreerange(&bit, &bits)) { 5061 if (bits == sblock.fs_frag) { 5062 acg.cg_cs.cs_nbfree++; 5063 cno = cbtocylno(&sblock, bit); 5064 cg_blktot(&acg)[cno]++; 5065 cg_blks(&sblock, &acg, cno)[cbtorpos(&sblock, bit)]++; 5066 } else { 5067 acg.cg_cs.cs_nffree += bits; 5068 acg.cg_frsum[bits]++; 5069 } 5070 bit += bits; 5071 } 5072 5073 *(fscs + acg.cg_cgx) = acg.cg_cs; 5074 5075 sblock.fs_cstotal.cs_nffree += acg.cg_cs.cs_nffree; 5076 sblock.fs_cstotal.cs_nbfree += acg.cg_cs.cs_nbfree; 5077 } 5078 5079 void 5080 extendcg(long cylno) 5081 { 5082 int i; 5083 diskaddr_t dupper; 5084 diskaddr_t cbase; 5085 diskaddr_t dmax; 5086 5087 /* 5088 * extend the cylinder group at the end of the old file system 5089 * if it was partially allocated becase of lack of space 5090 */ 5091 flcg(); 5092 rdcg(cylno); 5093 5094 dupper = acg.cg_ndblk; 5095 if (cylno == sblock.fs_ncg - 1) 5096 acg.cg_ncyl = sblock.fs_ncyl - (sblock.fs_cpg * cylno); 5097 else 5098 acg.cg_ncyl = sblock.fs_cpg; 5099 cbase = cgbase(&sblock, cylno); 5100 dmax = cbase + sblock.fs_fpg; 5101 if (dmax > sblock.fs_size) 5102 dmax = sblock.fs_size; 5103 acg.cg_ndblk = dmax - cbase; 5104 5105 for (i = dupper; i < acg.cg_ndblk; ++i) 5106 setbit(cg_blksfree(&acg), i); 5107 5108 sblock.fs_dsize += (acg.cg_ndblk - dupper); 5109 5110 wtcg(); 5111 flcg(); 5112 } 5113 5114 struct lockfs lockfs; 5115 int lockfd; 5116 int islocked; 5117 int lockfskey; 5118 char lockfscomment[128]; 5119 5120 void 5121 ulockfs() 5122 { 5123 /* 5124 * if the file system was locked, unlock it before exiting 5125 */ 5126 if (islocked == 0) 5127 return; 5128 5129 /* 5130 * first, check if the lock held 5131 */ 5132 lockfs.lf_flags = LOCKFS_MOD; 5133 if (ioctl(lockfd, _FIOLFSS, &lockfs) == -1) { 5134 perror(directory); 5135 lockexit(32); 5136 } 5137 5138 if (LOCKFS_IS_MOD(&lockfs)) { 5139 (void) fprintf(stderr, 5140 gettext("FILE SYSTEM CHANGED DURING GROWFS!\n")); 5141 (void) fprintf(stderr, 5142 gettext(" See lockfs(1), umount(1), and fsck(1)\n")); 5143 lockexit(32); 5144 } 5145 /* 5146 * unlock the file system 5147 */ 5148 lockfs.lf_lock = LOCKFS_ULOCK; 5149 lockfs.lf_flags = 0; 5150 lockfs.lf_key = lockfskey; 5151 clockfs(); 5152 if (ioctl(lockfd, _FIOLFS, &lockfs) == -1) { 5153 perror(directory); 5154 lockexit(32); 5155 } 5156 } 5157 5158 void 5159 wlockfs() 5160 { 5161 5162 /* 5163 * if no-write (-N), don't bother 5164 */ 5165 if (Nflag) 5166 return; 5167 /* 5168 * open the mountpoint, and write lock the file system 5169 */ 5170 if ((lockfd = open64(directory, O_RDONLY)) == -1) { 5171 perror(directory); 5172 lockexit(32); 5173 } 5174 5175 /* 5176 * check if it is already locked 5177 */ 5178 if (ioctl(lockfd, _FIOLFSS, &lockfs) == -1) { 5179 perror(directory); 5180 lockexit(32); 5181 } 5182 5183 if (lockfs.lf_lock != LOCKFS_WLOCK) { 5184 lockfs.lf_lock = LOCKFS_WLOCK; 5185 lockfs.lf_flags = 0; 5186 lockfs.lf_key = 0; 5187 clockfs(); 5188 if (ioctl(lockfd, _FIOLFS, &lockfs) == -1) { 5189 perror(directory); 5190 lockexit(32); 5191 } 5192 } 5193 islocked = 1; 5194 lockfskey = lockfs.lf_key; 5195 } 5196 5197 void 5198 clockfs() 5199 { 5200 time_t t; 5201 char *ct; 5202 5203 (void) time(&t); 5204 ct = ctime(&t); 5205 ct[strlen(ct)-1] = '\0'; 5206 5207 (void) sprintf(lockfscomment, "%s -- mkfs pid %d", ct, getpid()); 5208 lockfs.lf_comlen = strlen(lockfscomment)+1; 5209 lockfs.lf_comment = lockfscomment; 5210 } 5211 5212 /* 5213 * Write the csum records and the superblock 5214 */ 5215 void 5216 wtsb() 5217 { 5218 long i; 5219 5220 /* 5221 * write summary information 5222 */ 5223 for (i = 0; i < sblock.fs_cssize; i += sblock.fs_bsize) 5224 wtfs(fsbtodb(&sblock, (uint64_t)(sblock.fs_csaddr + 5225 numfrags(&sblock, i))), 5226 (int)(sblock.fs_cssize - i < sblock.fs_bsize ? 5227 sblock.fs_cssize - i : sblock.fs_bsize), 5228 ((char *)fscs) + i); 5229 5230 /* 5231 * write superblock 5232 */ 5233 sblock.fs_time = mkfstime; 5234 wtfs((diskaddr_t)(SBOFF / sectorsize), sbsize, (char *)&sblock); 5235 } 5236 5237 /* 5238 * Verify that the optimization selection is reasonable, and advance 5239 * the global "string" appropriately. 5240 */ 5241 static char 5242 checkopt(char *optim) 5243 { 5244 char opt; 5245 int limit = strcspn(optim, ","); 5246 5247 switch (limit) { 5248 case 0: /* missing indicator (have comma or nul) */ 5249 (void) fprintf(stderr, gettext( 5250 "mkfs: missing optimization flag reset to `t' (time)\n")); 5251 opt = 't'; 5252 break; 5253 5254 case 1: /* single-character indicator */ 5255 opt = *optim; 5256 if ((opt != 's') && (opt != 't')) { 5257 (void) fprintf(stderr, gettext( 5258 "mkfs: bad optimization value `%c' reset to `t' (time)\n"), 5259 opt); 5260 opt = 't'; 5261 } 5262 break; 5263 5264 default: /* multi-character indicator */ 5265 (void) fprintf(stderr, gettext( 5266 "mkfs: bad optimization value `%*.*s' reset to `t' (time)\n"), 5267 limit, limit, optim); 5268 opt = 't'; 5269 break; 5270 } 5271 5272 string += limit; 5273 5274 return (opt); 5275 } 5276 5277 /* 5278 * Verify that the mtb selection is reasonable, and advance 5279 * the global "string" appropriately. 5280 */ 5281 static char 5282 checkmtb(char *mtbarg) 5283 { 5284 char mtbc; 5285 int limit = strcspn(mtbarg, ","); 5286 5287 switch (limit) { 5288 case 0: /* missing indicator (have comma or nul) */ 5289 (void) fprintf(stderr, gettext( 5290 "mkfs: missing mtb flag reset to `n' (no mtb support)\n")); 5291 mtbc = 'n'; 5292 break; 5293 5294 case 1: /* single-character indicator */ 5295 mtbc = tolower(*mtbarg); 5296 if ((mtbc != 'y') && (mtbc != 'n')) { 5297 (void) fprintf(stderr, gettext( 5298 "mkfs: bad mtb value `%c' reset to `n' (no mtb support)\n"), 5299 mtbc); 5300 mtbc = 'n'; 5301 } 5302 break; 5303 5304 default: /* multi-character indicator */ 5305 (void) fprintf(stderr, gettext( 5306 "mkfs: bad mtb value `%*.*s' reset to `n' (no mtb support)\n"), 5307 limit, limit, mtbarg); 5308 opt = 'n'; 5309 break; 5310 } 5311 5312 string += limit; 5313 5314 return (mtbc); 5315 } 5316 5317 /* 5318 * Verify that a value is in a range. If it is not, resets it to 5319 * its default value if one is supplied, exits otherwise. 5320 * 5321 * When testing, can compare user_supplied to RC_KEYWORD or RC_POSITIONAL. 5322 */ 5323 static void 5324 range_check(long *varp, char *name, long minimum, long maximum, 5325 long def_val, int user_supplied) 5326 { 5327 dprintf(("DeBuG %s : %ld (%ld %ld %ld)\n", 5328 name, *varp, minimum, maximum, def_val)); 5329 5330 if ((*varp < minimum) || (*varp > maximum)) { 5331 if (user_supplied != RC_DEFAULT) { 5332 (void) fprintf(stderr, gettext( 5333 "mkfs: bad value for %s: %ld must be between %ld and %ld\n"), 5334 name, *varp, minimum, maximum); 5335 } 5336 if (def_val != NO_DEFAULT) { 5337 if (user_supplied) { 5338 (void) fprintf(stderr, 5339 gettext("mkfs: %s reset to default %ld\n"), 5340 name, def_val); 5341 } 5342 *varp = def_val; 5343 dprintf(("DeBuG %s : %ld\n", name, *varp)); 5344 return; 5345 } 5346 lockexit(2); 5347 /*NOTREACHED*/ 5348 } 5349 } 5350 5351 /* 5352 * Verify that a value is in a range. If it is not, resets it to 5353 * its default value if one is supplied, exits otherwise. 5354 * 5355 * When testing, can compare user_supplied to RC_KEYWORD or RC_POSITIONAL. 5356 */ 5357 static void 5358 range_check_64(uint64_t *varp, char *name, uint64_t minimum, uint64_t maximum, 5359 uint64_t def_val, int user_supplied) 5360 { 5361 if ((*varp < minimum) || (*varp > maximum)) { 5362 if (user_supplied != RC_DEFAULT) { 5363 (void) fprintf(stderr, gettext( 5364 "mkfs: bad value for %s: %lld must be between %lld and %lld\n"), 5365 name, *varp, minimum, maximum); 5366 } 5367 if (def_val != NO_DEFAULT) { 5368 if (user_supplied) { 5369 (void) fprintf(stderr, 5370 gettext("mkfs: %s reset to default %lld\n"), 5371 name, def_val); 5372 } 5373 *varp = def_val; 5374 return; 5375 } 5376 lockexit(2); 5377 /*NOTREACHED*/ 5378 } 5379 } 5380 5381 /* 5382 * Blocks SIGINT from delivery. Returns the previous mask in the 5383 * buffer provided, so that mask may be later restored. 5384 */ 5385 static void 5386 block_sigint(sigset_t *old_mask) 5387 { 5388 sigset_t block_mask; 5389 5390 if (sigemptyset(&block_mask) < 0) { 5391 fprintf(stderr, gettext("Could not clear signal mask\n")); 5392 lockexit(3); 5393 } 5394 if (sigaddset(&block_mask, SIGINT) < 0) { 5395 fprintf(stderr, gettext("Could not set signal mask\n")); 5396 lockexit(3); 5397 } 5398 if (sigprocmask(SIG_BLOCK, &block_mask, old_mask) < 0) { 5399 fprintf(stderr, gettext("Could not block SIGINT\n")); 5400 lockexit(3); 5401 } 5402 } 5403 5404 /* 5405 * Restores the signal mask that was in force before a call 5406 * to block_sigint(). This may actually still have SIGINT blocked, 5407 * if we've been recursively invoked. 5408 */ 5409 static void 5410 unblock_sigint(sigset_t *old_mask) 5411 { 5412 if (sigprocmask(SIG_UNBLOCK, old_mask, (sigset_t *)NULL) < 0) { 5413 fprintf(stderr, gettext("Could not restore signal mask\n")); 5414 lockexit(3); 5415 } 5416 } 5417 5418 /* 5419 * Attempt to be somewhat graceful about being interrupted, rather than 5420 * just silently leaving the filesystem in an unusable state. 5421 * 5422 * The kernel has blocked SIGINT upon entry, so we don't have to worry 5423 * about recursion if the user starts pounding on the keyboard. 5424 */ 5425 static void 5426 recover_from_sigint(int signum) 5427 { 5428 if (fso > -1) { 5429 if ((Nflag != 0) || confirm_abort()) { 5430 lockexit(4); 5431 } 5432 } 5433 } 5434 5435 static int 5436 confirm_abort(void) 5437 { 5438 char line[80]; 5439 5440 printf(gettext("\n\nAborting at this point will leave the filesystem " 5441 "in an inconsistent\nstate. If you do choose to stop, " 5442 "you will be given instructions on how to\nrecover " 5443 "the filesystem. Do you wish to cancel the filesystem " 5444 "grow\noperation (y/n)?")); 5445 if (getline(stdin, line, sizeof (line)) == EOF) 5446 line[0] = 'y'; 5447 5448 printf("\n"); 5449 if (line[0] == 'y' || line[0] == 'Y') 5450 return (1); 5451 else { 5452 return (0); 5453 } 5454 } 5455 5456 static int 5457 getline(FILE *fp, char *loc, int maxlen) 5458 { 5459 int n; 5460 char *p, *lastloc; 5461 5462 p = loc; 5463 lastloc = &p[maxlen-1]; 5464 while ((n = getc(fp)) != '\n') { 5465 if (n == EOF) 5466 return (EOF); 5467 if (!isspace(n) && p < lastloc) 5468 *p++ = n; 5469 } 5470 *p = 0; 5471 return (p - loc); 5472 } 5473 5474 /* 5475 * Calculate the maximum value of cylinders-per-group for a file 5476 * system with the characteristics: 5477 * 5478 * bsize - file system block size 5479 * fragsize - frag size 5480 * nbpi - number of bytes of disk space per inode 5481 * nrpos - number of rotational positions 5482 * spc - sectors per cylinder 5483 * 5484 * These five characteristic are not adjustable (by this function). 5485 * The only attribute of the file system which IS adjusted by this 5486 * function in order to maximize cylinders-per-group is the proportion 5487 * of the cylinder group overhead block used for the inode map. The 5488 * inode map cannot occupy more than one-third of the cylinder group 5489 * overhead block, but it's OK for it to occupy less than one-third 5490 * of the overhead block. 5491 * 5492 * The setting of nbpi determines one possible value for the maximum 5493 * size of a cylinder group. It does so because it determines the total 5494 * number of inodes in the file system (file system size is fixed, and 5495 * nbpi is fixed, so the total number of inodes is fixed too). The 5496 * cylinder group has to be small enough so that the number of inodes 5497 * in the cylinder group is less than or equal to the number of bits 5498 * in one-third (or whatever proportion is assumed) of a file system 5499 * block. The details of the calculation are: 5500 * 5501 * The macro MAXIpG_B(bsize, inode_divisor) determines the maximum 5502 * number of inodes that can be in a cylinder group, given the 5503 * proportion of the cylinder group overhead block used for the 5504 * inode bitmaps (an inode_divisor of 3 means that 1/3 of the 5505 * block is used for inode bitmaps; an inode_divisor of 12 means 5506 * that 1/12 of the block is used for inode bitmaps.) 5507 * 5508 * Once the number of inodes per cylinder group is known, the 5509 * maximum value of cylinders-per-group (determined by nbpi) 5510 * is calculated by the formula 5511 * 5512 * maxcpg_given_nbpi = (size of a cylinder group)/(size of a cylinder) 5513 * 5514 * = (inodes-per-cg * nbpi)/(spc * DEV_BSIZE) 5515 * 5516 * (Interestingly, the size of the file system never enters 5517 * into this calculation.) 5518 * 5519 * Another possible value for the maximum cylinder group size is determined 5520 * by frag_size and nrpos. The frags in the cylinder group must be 5521 * representable in the frag bitmaps in the cylinder overhead block and the 5522 * rotational positions for each cylinder must be represented in the 5523 * rotational position tables. The calculation of the maximum cpg 5524 * value, given the frag and nrpos vales, is: 5525 * 5526 * maxcpg_given_fragsize = 5527 * (available space in the overhead block) / (size of per-cylinder data) 5528 * 5529 * The available space in the overhead block = 5530 * bsize - sizeof (struct cg) - space_used_for_inode_bitmaps 5531 * 5532 * The size of the per-cylinder data is: 5533 * sizeof(long) # for the "blocks avail per cylinder" field 5534 * + nrpos * sizeof(short) # for the rotational position table entry 5535 * + frags-per-cylinder/NBBY # number of bytes to represent this 5536 * # cylinder in the frag bitmap 5537 * 5538 * The two calculated maximum values of cylinder-per-group will typically 5539 * turn out to be different, since they are derived from two different 5540 * constraints. Usually, maxcpg_given_nbpi is much bigger than 5541 * maxcpg_given_fragsize. But they can be brought together by 5542 * adjusting the proportion of the overhead block dedicated to 5543 * the inode bitmaps. Decreasing the proportion of the cylinder 5544 * group overhead block used for inode maps will decrease 5545 * maxcpg_given_nbpi and increase maxcpg_given_fragsize. 5546 * 5547 * This function calculates the initial values of maxcpg_given_nbpi 5548 * and maxcpg_given_fragsize assuming that 1/3 of the cg overhead 5549 * block is used for inode bitmaps. Then it decreases the proportion 5550 * of the cg overhead block used for inode bitmaps (by increasing 5551 * the value of inode_divisor) until maxcpg_given_nbpi and 5552 * maxcpg_given_fragsize are the same, or stop changing, or 5553 * maxcpg_given_nbpi is less than maxcpg_given_fragsize. 5554 * 5555 * The loop terminates when any of the following occur: 5556 * * maxcpg_given_fragsize is greater than or equal to 5557 * maxcpg_given_nbpi 5558 * * neither maxcpg_given_fragsize nor maxcpg_given_nbpi 5559 * change in the expected direction 5560 * 5561 * The loop is guaranteed to terminate because it only continues 5562 * while maxcpg_given_fragsize and maxcpg_given_nbpi are approaching 5563 * each other. As soon they cross each other, or neither one changes 5564 * in the direction of the other, or one of them moves in the wrong 5565 * direction, the loop completes. 5566 */ 5567 5568 static long 5569 compute_maxcpg(long bsize, long fragsize, long nbpi, long nrpos, long spc) 5570 { 5571 int maxcpg_given_nbpi; /* in cylinders */ 5572 int maxcpg_given_fragsize; /* in cylinders */ 5573 int spf; /* sectors per frag */ 5574 int inode_divisor; 5575 int old_max_given_frag = 0; 5576 int old_max_given_nbpi = INT_MAX; 5577 5578 spf = fragsize / DEV_BSIZE; 5579 inode_divisor = 3; 5580 5581 while (1) { 5582 maxcpg_given_nbpi = 5583 (((int64_t)(MAXIpG_B(bsize, inode_divisor))) * nbpi) / 5584 (DEV_BSIZE * ((int64_t)spc)); 5585 maxcpg_given_fragsize = 5586 (bsize - (sizeof (struct cg)) - (bsize / inode_divisor)) / 5587 (sizeof (long) + nrpos * sizeof (short) + 5588 (spc / spf) / NBBY); 5589 5590 if (maxcpg_given_fragsize >= maxcpg_given_nbpi) 5591 return (maxcpg_given_nbpi); 5592 5593 /* 5594 * If neither value moves toward the other, return the 5595 * least of the old values (we use the old instead of the 5596 * new because: if the old is the same as the new, it 5597 * doesn't matter which ones we use. If one of the 5598 * values changed, but in the wrong direction, the 5599 * new values are suspect. Better use the old. This 5600 * shouldn't happen, but it's best to check. 5601 */ 5602 5603 if (!(maxcpg_given_nbpi < old_max_given_nbpi) && 5604 !(maxcpg_given_fragsize > old_max_given_frag)) 5605 return (MIN(old_max_given_nbpi, old_max_given_frag)); 5606 5607 /* 5608 * This is probably impossible, but if one of the maxcpg 5609 * values moved in the "right" direction and one moved 5610 * in the "wrong" direction (that is, the two values moved 5611 * in the same direction), the previous conditional won't 5612 * recognize that the values aren't converging (since at 5613 * least one value moved in the "right" direction, the 5614 * last conditional says "keep going"). 5615 * 5616 * Just to make absolutely certain that the loop terminates, 5617 * check for one of the values moving in the "wrong" direction 5618 * and terminate the loop if it happens. 5619 */ 5620 5621 if (maxcpg_given_nbpi > old_max_given_nbpi || 5622 maxcpg_given_fragsize < old_max_given_frag) 5623 return (MIN(old_max_given_nbpi, old_max_given_frag)); 5624 5625 old_max_given_nbpi = maxcpg_given_nbpi; 5626 old_max_given_frag = maxcpg_given_fragsize; 5627 5628 inode_divisor++; 5629 } 5630 } 5631 5632 static int 5633 in_64bit_mode(void) 5634 { 5635 /* cmd must be an absolute path, for security */ 5636 char *cmd = "/usr/bin/isainfo -b"; 5637 char buf[BUFSIZ]; 5638 FILE *ptr; 5639 int retval = 0; 5640 5641 putenv("IFS= \t"); 5642 if ((ptr = popen(cmd, "r")) != NULL) { 5643 if (fgets(buf, BUFSIZ, ptr) != NULL && 5644 strncmp(buf, "64", 2) == 0) 5645 retval = 1; 5646 (void) pclose(ptr); 5647 } 5648 return (retval); 5649 } 5650 5651 /* 5652 * validate_size 5653 * 5654 * Return 1 if the device appears to be at least "size" sectors long. 5655 * Return 0 if it's shorter or we can't read it. 5656 */ 5657 5658 static int 5659 validate_size(int fd, diskaddr_t size) 5660 { 5661 char buf[DEV_BSIZE]; 5662 int rc; 5663 5664 if ((llseek(fd, (offset_t)((size - 1) * DEV_BSIZE), SEEK_SET) == -1) || 5665 (read(fd, buf, DEV_BSIZE)) != DEV_BSIZE) 5666 rc = 0; 5667 else 5668 rc = 1; 5669 return (rc); 5670 } 5671 5672 /* 5673 * Print every field of the calculated superblock, along with 5674 * its value. To make parsing easier on the caller, the value 5675 * is printed first, then the name. Additionally, there's only 5676 * one name/value pair per line. All values are reported in 5677 * hexadecimal (with the traditional 0x prefix), as that's slightly 5678 * easier for humans to read. Not that they're expected to, but 5679 * debugging happens. 5680 */ 5681 static void 5682 dump_sblock(void) 5683 { 5684 int row, column, pending, written; 5685 caddr_t source; 5686 5687 if (Rflag) { 5688 pending = sizeof (sblock); 5689 source = (caddr_t)&sblock; 5690 do { 5691 written = write(fileno(stdout), source, pending); 5692 pending -= written; 5693 source += written; 5694 } while ((pending > 0) && (written > 0)); 5695 5696 if (written < 0) { 5697 perror(gettext("Binary dump of superblock failed")); 5698 lockexit(1); 5699 } 5700 return; 5701 } else { 5702 printf("0x%x sblock.fs_link\n", sblock.fs_link); 5703 printf("0x%x sblock.fs_rolled\n", sblock.fs_rolled); 5704 printf("0x%x sblock.fs_sblkno\n", sblock.fs_sblkno); 5705 printf("0x%x sblock.fs_cblkno\n", sblock.fs_cblkno); 5706 printf("0x%x sblock.fs_iblkno\n", sblock.fs_iblkno); 5707 printf("0x%x sblock.fs_dblkno\n", sblock.fs_dblkno); 5708 printf("0x%x sblock.fs_cgoffset\n", sblock.fs_cgoffset); 5709 printf("0x%x sblock.fs_cgmask\n", sblock.fs_cgmask); 5710 printf("0x%x sblock.fs_time\n", sblock.fs_time); 5711 printf("0x%x sblock.fs_size\n", sblock.fs_size); 5712 printf("0x%x sblock.fs_dsize\n", sblock.fs_dsize); 5713 printf("0x%x sblock.fs_ncg\n", sblock.fs_ncg); 5714 printf("0x%x sblock.fs_bsize\n", sblock.fs_bsize); 5715 printf("0x%x sblock.fs_fsize\n", sblock.fs_fsize); 5716 printf("0x%x sblock.fs_frag\n", sblock.fs_frag); 5717 printf("0x%x sblock.fs_minfree\n", sblock.fs_minfree); 5718 printf("0x%x sblock.fs_rotdelay\n", sblock.fs_rotdelay); 5719 printf("0x%x sblock.fs_rps\n", sblock.fs_rps); 5720 printf("0x%x sblock.fs_bmask\n", sblock.fs_bmask); 5721 printf("0x%x sblock.fs_fmask\n", sblock.fs_fmask); 5722 printf("0x%x sblock.fs_bshift\n", sblock.fs_bshift); 5723 printf("0x%x sblock.fs_fshift\n", sblock.fs_fshift); 5724 printf("0x%x sblock.fs_maxcontig\n", sblock.fs_maxcontig); 5725 printf("0x%x sblock.fs_maxbpg\n", sblock.fs_maxbpg); 5726 printf("0x%x sblock.fs_fragshift\n", sblock.fs_fragshift); 5727 printf("0x%x sblock.fs_fsbtodb\n", sblock.fs_fsbtodb); 5728 printf("0x%x sblock.fs_sbsize\n", sblock.fs_sbsize); 5729 printf("0x%x sblock.fs_csmask\n", sblock.fs_csmask); 5730 printf("0x%x sblock.fs_csshift\n", sblock.fs_csshift); 5731 printf("0x%x sblock.fs_nindir\n", sblock.fs_nindir); 5732 printf("0x%x sblock.fs_inopb\n", sblock.fs_inopb); 5733 printf("0x%x sblock.fs_nspf\n", sblock.fs_nspf); 5734 printf("0x%x sblock.fs_optim\n", sblock.fs_optim); 5735 #ifdef _LITTLE_ENDIAN 5736 printf("0x%x sblock.fs_state\n", sblock.fs_state); 5737 #else 5738 printf("0x%x sblock.fs_npsect\n", sblock.fs_npsect); 5739 #endif 5740 printf("0x%x sblock.fs_si\n", sblock.fs_si); 5741 printf("0x%x sblock.fs_trackskew\n", sblock.fs_trackskew); 5742 printf("0x%x sblock.fs_id[0]\n", sblock.fs_id[0]); 5743 printf("0x%x sblock.fs_id[1]\n", sblock.fs_id[1]); 5744 printf("0x%x sblock.fs_csaddr\n", sblock.fs_csaddr); 5745 printf("0x%x sblock.fs_cssize\n", sblock.fs_cssize); 5746 printf("0x%x sblock.fs_cgsize\n", sblock.fs_cgsize); 5747 printf("0x%x sblock.fs_ntrak\n", sblock.fs_ntrak); 5748 printf("0x%x sblock.fs_nsect\n", sblock.fs_nsect); 5749 printf("0x%x sblock.fs_spc\n", sblock.fs_spc); 5750 printf("0x%x sblock.fs_ncyl\n", sblock.fs_ncyl); 5751 printf("0x%x sblock.fs_cpg\n", sblock.fs_cpg); 5752 printf("0x%x sblock.fs_ipg\n", sblock.fs_ipg); 5753 printf("0x%x sblock.fs_fpg\n", sblock.fs_fpg); 5754 printf("0x%x sblock.fs_cstotal\n", sblock.fs_cstotal); 5755 printf("0x%x sblock.fs_fmod\n", sblock.fs_fmod); 5756 printf("0x%x sblock.fs_clean\n", sblock.fs_clean); 5757 printf("0x%x sblock.fs_ronly\n", sblock.fs_ronly); 5758 printf("0x%x sblock.fs_flags\n", sblock.fs_flags); 5759 printf("0x%x sblock.fs_fsmnt\n", sblock.fs_fsmnt); 5760 printf("0x%x sblock.fs_cgrotor\n", sblock.fs_cgrotor); 5761 printf("0x%x sblock.fs_u.fs_csp\n", sblock.fs_u.fs_csp); 5762 printf("0x%x sblock.fs_cpc\n", sblock.fs_cpc); 5763 5764 /* 5765 * No macros are defined for the dimensions of the 5766 * opostbl array. 5767 */ 5768 for (row = 0; row < 16; row++) { 5769 for (column = 0; column < 8; column++) { 5770 printf("0x%x sblock.fs_opostbl[%d][%d]\n", 5771 sblock.fs_opostbl[row][column], 5772 row, column); 5773 } 5774 } 5775 5776 /* 5777 * Ditto the size of sparecon. 5778 */ 5779 for (row = 0; row < 51; row++) { 5780 printf("0x%x sblock.fs_sparecon[%d]\n", 5781 sblock.fs_sparecon[row], row); 5782 } 5783 5784 printf("0x%x sblock.fs_version\n", sblock.fs_version); 5785 printf("0x%x sblock.fs_logbno\n", sblock.fs_logbno); 5786 printf("0x%x sblock.fs_reclaim\n", sblock.fs_reclaim); 5787 printf("0x%x sblock.fs_sparecon2\n", sblock.fs_sparecon2); 5788 #ifdef _LITTLE_ENDIAN 5789 printf("0x%x sblock.fs_npsect\n", sblock.fs_npsect); 5790 #else 5791 printf("0x%x sblock.fs_state\n", sblock.fs_state); 5792 #endif 5793 printf("0x%llx sblock.fs_qbmask\n", sblock.fs_qbmask); 5794 printf("0x%llx sblock.fs_qfmask\n", sblock.fs_qfmask); 5795 printf("0x%x sblock.fs_postblformat\n", sblock.fs_postblformat); 5796 printf("0x%x sblock.fs_nrpos\n", sblock.fs_nrpos); 5797 printf("0x%x sblock.fs_postbloff\n", sblock.fs_postbloff); 5798 printf("0x%x sblock.fs_rotbloff\n", sblock.fs_rotbloff); 5799 printf("0x%x sblock.fs_magic\n", sblock.fs_magic); 5800 5801 /* 5802 * fs_space isn't of much use in this context, so we'll 5803 * just ignore it for now. 5804 */ 5805 } 5806 } 5807