1 /* 2 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 7 /* All Rights Reserved */ 8 9 /* 10 * Copyright (c) 1980, 1986, 1990 The Regents of the University of California. 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms are permitted 14 * provided that: (1) source distributions retain this entire copyright 15 * notice and comment, and (2) distributions including binaries display 16 * the following acknowledgement: ``This product includes software 17 * developed by the University of California, Berkeley and its contributors'' 18 * in the documentation or other materials provided with the distribution 19 * and in all advertising materials mentioning features or use of this 20 * software. Neither the name of the University nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific prior written permission. 23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 24 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 26 */ 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <sys/param.h> 31 #include <sys/types.h> 32 #include <sys/sysmacros.h> 33 #include <sys/mntent.h> 34 #include <string.h> 35 #include <stdarg.h> 36 #include <sys/fs/ufs_fs.h> 37 #include <sys/vnode.h> 38 #include <sys/fs/ufs_inode.h> 39 #define _KERNEL 40 #include <sys/fs/ufs_fsdir.h> 41 #undef _KERNEL 42 #include "fsck.h" 43 44 struct rc_queue { 45 struct rc_queue *rc_next; 46 fsck_ino_t rc_orphan; 47 fsck_ino_t rc_parent; 48 caddr_t rc_name; 49 }; 50 51 caddr_t lfname = "lost+found"; /* name to use for l+f dir */ 52 static int lfmode = 01700; /* mode to use when creating l+f dir */ 53 static struct dirtemplate emptydir = { 0, DIRBLKSIZ }; 54 static struct dirtemplate dirhead = { 55 0, 12, 1, ".", 0, DIRBLKSIZ - 12, 2, ".." 56 }; 57 58 static void lftempname(char *, fsck_ino_t); 59 static int do_reconnect(fsck_ino_t, fsck_ino_t, caddr_t); 60 static caddr_t mkuniqname(caddr_t, caddr_t, fsck_ino_t, fsck_ino_t); 61 static int chgino(struct inodesc *); 62 static int dircheck(struct inodesc *, struct direct *); 63 static int expanddir(fsck_ino_t, char *); 64 static void freedir(fsck_ino_t, fsck_ino_t); 65 static struct direct *fsck_readdir(struct inodesc *); 66 static struct bufarea *getdirblk(daddr32_t, size_t); 67 static int mkentry(struct inodesc *); 68 static fsck_ino_t newdir(fsck_ino_t, fsck_ino_t, int, caddr_t); 69 static fsck_ino_t reallocdir(fsck_ino_t, fsck_ino_t, int, caddr_t); 70 71 /* 72 * Propagate connected state through the tree. 73 */ 74 void 75 propagate(void) 76 { 77 struct inoinfo **inpp, *inp; 78 struct inoinfo **inpend; 79 int change, inorphan; 80 81 inpend = &inpsort[inplast]; 82 do { 83 change = 0; 84 for (inpp = inpsort; inpp < inpend; inpp++) { 85 inp = *inpp; 86 if (inp->i_parent == 0) 87 continue; 88 if (statemap[inp->i_parent] == DFOUND && 89 INO_IS_DUNFOUND(inp->i_number)) { 90 inorphan = statemap[inp->i_number] & INORPHAN; 91 statemap[inp->i_number] = DFOUND | inorphan; 92 change++; 93 } 94 } 95 } while (change > 0); 96 } 97 98 /* 99 * Scan each entry in a directory block. 100 */ 101 int 102 dirscan(struct inodesc *idesc) 103 { 104 struct direct *dp; 105 struct bufarea *bp; 106 uint_t dsize, n; 107 size_t blksiz; 108 union { /* keep lint happy about alignment */ 109 char dbuf[DIRBLKSIZ]; 110 struct direct dir; 111 } u; 112 113 if (idesc->id_type != DATA) 114 errexit("wrong type to dirscan %d\n", idesc->id_type); 115 if (idesc->id_entryno == 0 && 116 (idesc->id_filesize & (DIRBLKSIZ - 1)) != 0) 117 idesc->id_filesize = roundup(idesc->id_filesize, DIRBLKSIZ); 118 blksiz = idesc->id_numfrags * sblock.fs_fsize; 119 if (chkrange(idesc->id_blkno, idesc->id_numfrags)) { 120 idesc->id_filesize -= (offset_t)blksiz; 121 return (SKIP); 122 } 123 idesc->id_loc = 0; 124 for (dp = fsck_readdir(idesc); dp != NULL; dp = fsck_readdir(idesc)) { 125 /* 126 * If we were just passed a corrupt directory entry with 127 * d_reclen > DIRBLKSIZ, we don't want to memmove() all over 128 * our stack. This directory gets cleaned up later. 129 */ 130 dsize = MIN(dp->d_reclen, sizeof (u.dbuf)); 131 (void) memmove((void *)u.dbuf, (void *)dp, (size_t)dsize); 132 idesc->id_dirp = &u.dir; 133 if ((n = (*idesc->id_func)(idesc)) & ALTERED) { 134 /* 135 * We can ignore errors from getdirblk() here, 136 * as the block is still in memory thanks to 137 * buffering and fsck_readdir(). If there was 138 * an error reading it before, then all decisions 139 * leading to getting us here were based on the 140 * resulting zeros. As such, we have nothing 141 * to worry about at this point. 142 */ 143 bp = getdirblk(idesc->id_blkno, blksiz); 144 (void) memmove((void *)(bp->b_un.b_buf + 145 idesc->id_loc - dsize), 146 (void *)u.dbuf, (size_t)dsize); 147 dirty(bp); 148 sbdirty(); 149 } 150 if (n & STOP) 151 return (n); 152 } 153 return (idesc->id_filesize > 0 ? KEEPON : STOP); 154 } 155 156 /* 157 * Get current entry in a directory (and peek at the next entry). 158 */ 159 static struct direct * 160 fsck_readdir(struct inodesc *idesc) 161 { 162 struct direct *dp, *ndp = 0; 163 struct bufarea *bp; 164 ushort_t size; /* of directory entry */ 165 size_t blksiz; 166 int dofixret; 167 int salvaged; /* when to report SALVAGED in preen mode */ 168 int origloc = idesc->id_loc; 169 170 blksiz = idesc->id_numfrags * sblock.fs_fsize; 171 /* 172 * Sanity check id_filesize and id_loc fields. The latter 173 * has to be within the block we're looking at, as well as 174 * aligned to a four-byte boundary. The alignment is due to 175 * a struct direct containing four-byte integers. It's 176 * unfortunate that the four is a magic number, but there's 177 * really no good way to derive it from the ufs header files. 178 */ 179 if ((idesc->id_filesize <= 0) || (idesc->id_loc >= blksiz) || 180 ((idesc->id_loc & 3) != 0)) 181 return (NULL); 182 /* 183 * We don't have to worry about holes in the directory's 184 * block list, because that was checked for when the 185 * inode was first encountered during pass1. We never 186 * scan a directory until after we've vetted its block list. 187 */ 188 /* 189 * We can ignore errors from getdirblk() here, as dircheck() 190 * will reject any entries that would have been in the bad 191 * sectors (fsck_bread() fills in zeros on failures). The main 192 * reject keys are that d_reclen would be zero and/or that it 193 * is less than the minimal size of a directory entry. Since 194 * entries can't span sectors, there's no worry about having 195 * a good beginning in one sector and the rest in the next, 196 * where that second sector was unreadable and therefore 197 * replaced with zeros. 198 */ 199 bp = getdirblk(idesc->id_blkno, blksiz); 200 /* LINTED b_buf is aligned and id_loc was verified above */ 201 dp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc); 202 203 /* 204 * Check the current entry in the directory. 205 */ 206 if (dircheck(idesc, dp) == 0) { 207 /* 208 * If we are in here, then either the current directory 209 * entry is bad or the next directory entry is bad. 210 */ 211 next_is_bad: 212 /* 213 * Find the amount of space left to the end of the 214 * directory block for either directory entry. 215 */ 216 size = DIRBLKSIZ - (idesc->id_loc & (DIRBLKSIZ - 1)); 217 218 /* 219 * Advance to the end of the directory block. 220 */ 221 idesc->id_loc += size; 222 idesc->id_filesize -= (offset_t)size; 223 224 /* 225 * Ask the question before we fix the in-core directory 226 * block because dofix() may reuse the buffer. 227 */ 228 salvaged = (idesc->id_fix == DONTKNOW); 229 dofixret = dofix(idesc, "DIRECTORY CORRUPTED"); 230 231 /* 232 * If there was an error reading the block, then that 233 * same error can reasonably be expected to have occurred 234 * when it was read previously. As such, the decision 235 * to come here was based on the results of that partially- 236 * zerod block, and so anything we change should be 237 * based on it as well. Upshot: no need to check for 238 * errors here. 239 */ 240 bp = getdirblk(idesc->id_blkno, blksiz); 241 /* LINTED b_buf is aligned and id_loc/origloc was verified */ 242 dp = (struct direct *)(bp->b_un.b_buf + origloc); 243 244 /* 245 * This is the current directory entry and since it is 246 * corrupt we cannot trust the rest of the directory 247 * block so change the current directory entry to 248 * contain nothing and encompass the rest of the block. 249 */ 250 if (ndp == NULL) { 251 dp->d_reclen = size; 252 dp->d_ino = 0; 253 dp->d_namlen = 0; 254 dp->d_name[0] = '\0'; 255 } 256 /* 257 * This is the next directory entry, i.e., we got here 258 * via a "goto next_is_bad". That directory entry is 259 * corrupt. However, the current directory entry is okay 260 * so if we are in fix mode, just extend its record size 261 * to encompass the rest of the block. 262 */ 263 else if (dofixret) { 264 dp->d_reclen += size; 265 } 266 /* 267 * If the user said to fix the directory corruption, then 268 * mark the block as dirty. Otherwise, our "repairs" only 269 * apply to the in-core copy so we don't hand back trash 270 * to the caller. 271 * 272 * Note: It is possible that saying "no" to a change in 273 * one part of the I/O buffer and "yes" to a later change 274 * in the same I/O buffer may still flush the change to 275 * which we said "no". This is the pathological case and 276 * no fix is planned at this time. 277 */ 278 if (dofixret) { 279 dirty(bp); 280 if (preen && salvaged) 281 (void) printf(" (SALVAGED)\n"); 282 if (idesc->id_number == lfdir) 283 lfdir = 0; 284 } 285 286 /* 287 * dp points into bp, which will get re-used at some 288 * arbitrary time in the future. We rely on the fact 289 * that we're singled-threaded, and that we'll be done 290 * with this directory entry by the time the next one 291 * is needed. 292 */ 293 return (dp); 294 } 295 /* 296 * The current directory entry checked out so advance past it. 297 */ 298 idesc->id_loc += dp->d_reclen; 299 idesc->id_filesize -= (offset_t)dp->d_reclen; 300 /* 301 * If we are not at the directory block boundary, then peek 302 * at the next directory entry and if it is bad we can add 303 * its space to the current directory entry (compression). 304 * Again, we sanity check the id_loc and id_filesize fields 305 * since we modified them above. 306 */ 307 if ((idesc->id_loc & (DIRBLKSIZ - 1)) && /* not at start */ 308 (idesc->id_loc < blksiz) && /* within block */ 309 ((idesc->id_loc & 3) == 0) && /* properly aligned */ 310 (idesc->id_filesize > 0)) { /* data follows */ 311 /* LINTED b_buf is aligned and id_loc verified to be ok */ 312 ndp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc); 313 if (dircheck(idesc, ndp) == 0) 314 goto next_is_bad; 315 } 316 317 /* 318 * See comment above about dp pointing into bp. 319 */ 320 return (dp); 321 } 322 323 /* 324 * Verify that a directory entry is valid. 325 * This is a superset of the checks made in the kernel. 326 */ 327 static int 328 dircheck(struct inodesc *idesc, struct direct *dp) 329 { 330 size_t size; 331 char *cp; 332 int spaceleft; 333 334 /* 335 * Recall that id_filesize is the number of bytes left to 336 * process in the directory. We check id_filesize >= size 337 * instead of id_filesize >= d_reclen because all that the 338 * directory is actually required to contain is the entry 339 * itself (and it's how the kernel does the allocation). 340 * 341 * We indirectly check for d_reclen going past the end of 342 * the allocated space by comparing it against spaceleft. 343 */ 344 size = DIRSIZ(dp); 345 spaceleft = DIRBLKSIZ - (idesc->id_loc % DIRBLKSIZ); 346 if (dp->d_ino < maxino && 347 dp->d_reclen != 0 && 348 (int)dp->d_reclen <= spaceleft && 349 (dp->d_reclen & 0x3) == 0 && 350 (int)dp->d_reclen >= size && 351 idesc->id_filesize >= (offset_t)size && 352 dp->d_namlen <= MAXNAMLEN) { 353 if (dp->d_ino == 0) 354 return (1); 355 for (cp = dp->d_name, size = 0; size < (size_t)dp->d_namlen; 356 size++, cp++) 357 if ((*cp == '\0') || (*cp == '/')) 358 goto bad; 359 if (*cp == '\0') 360 return (1); 361 } 362 bad: 363 if (debug) { 364 (void) printf("Bad dir in inode %d at lbn %d, loc %d:\n", 365 idesc->id_number, idesc->id_lbn, idesc->id_loc); 366 (void) printf(" ino %d reclen %d namlen %d name `%s'\n", 367 dp->d_ino, dp->d_reclen, dp->d_namlen, dp->d_name); 368 } 369 return (0); 370 } 371 372 void 373 adjust(struct inodesc *idesc, int lcnt) 374 { 375 struct dinode *dp; 376 caddr_t flow; 377 int saveiscorrupt; 378 struct inodesc lcidesc; 379 380 dp = ginode(idesc->id_number); 381 if (dp->di_nlink == lcnt) { 382 /* 383 * If we have not hit any unresolved problems, are running 384 * in preen mode, and are on a file system using logging, 385 * then just toss any partially allocated files, as they are 386 * an expected occurrence. 387 */ 388 if (!iscorrupt && preen && islog) { 389 clri(idesc, "UNREF", CLRI_VERBOSE, CLRI_NOP_OK); 390 return; 391 } else { 392 /* 393 * The file system can be considered clean even if 394 * a file is not linked up, but is cleared. In 395 * other words, the kernel won't panic over it. 396 * Hence, iscorrupt should not be set when 397 * linkup is answered no, but clri is answered yes. 398 * 399 * If neither is answered yes, then we have a 400 * non-panic-inducing known corruption that the 401 * user needs to be reminded of when we exit. 402 */ 403 saveiscorrupt = iscorrupt; 404 if (linkup(idesc->id_number, (fsck_ino_t)0, 405 NULL) == 0) { 406 iscorrupt = saveiscorrupt; 407 clri(idesc, "UNREF", CLRI_QUIET, CLRI_NOP_OK); 408 if (statemap[idesc->id_number] != USTATE) 409 iscorrupt = 1; 410 return; 411 } 412 dp = ginode(idesc->id_number); 413 } 414 lcnt = lncntp[idesc->id_number]; 415 } 416 417 /* 418 * It doesn't happen often, but it's possible to get a true 419 * excess of links (especially if a lot of directories got 420 * orphaned and reattached to lost+found). Instead of wrapping 421 * around, do something semi-useful (i.e., give progress towards 422 * a less-broken filesystem) when this happens. 423 */ 424 LINK_RANGE(flow, dp->di_nlink, -lcnt); 425 if (flow != NULL) { 426 LINK_CLEAR(flow, idesc->id_number, dp->di_mode, &lcidesc); 427 if (statemap[idesc->id_number] == USTATE) 428 return; 429 } 430 431 dp = ginode(idesc->id_number); 432 if (lcnt && dp->di_nlink != lcnt) { 433 pwarn("LINK COUNT %s", 434 file_id(idesc->id_number, dp->di_mode)); 435 pinode(idesc->id_number); 436 dp = ginode(idesc->id_number); 437 (void) printf(" COUNT %d SHOULD BE %d", 438 dp->di_nlink, dp->di_nlink - lcnt); 439 /* 440 * Even lost+found is subject to this, as whenever 441 * we modify it, we update both the in-memory and 442 * on-disk counts. Thus, they should still be in 443 * sync. 444 */ 445 if (preen) { 446 if (lcnt < 0) { 447 (void) printf("\n"); 448 if ((dp->di_mode & IFMT) == IFSHAD) 449 pwarn("LINK COUNT INCREASING"); 450 else 451 pfatal("LINK COUNT INCREASING"); 452 } 453 } 454 if (preen || reply("ADJUST") == 1) { 455 dp->di_nlink -= lcnt; 456 inodirty(); 457 if (preen) 458 (void) printf(" (ADJUSTED)\n"); 459 } else if (((dp->di_mode & IFMT) == IFDIR) || 460 ((dp->di_mode & IFMT) == IFATTRDIR)) { 461 /* 462 * File counts can be off relatively harmlessly, 463 * but a bad directory count can cause the 464 * kernel to lose its mind. 465 */ 466 iscorrupt = 1; 467 } 468 } 469 } 470 471 static int 472 mkentry(struct inodesc *idesc) 473 { 474 struct direct *dirp = idesc->id_dirp; 475 struct direct newent; 476 int newlen, oldlen; 477 478 newent.d_namlen = strlen(idesc->id_name); 479 newlen = DIRSIZ(&newent); 480 if (dirp->d_ino != 0) 481 oldlen = DIRSIZ(dirp); 482 else 483 oldlen = 0; 484 if ((int)dirp->d_reclen - oldlen < newlen) 485 return (KEEPON); 486 newent.d_reclen = dirp->d_reclen - (ushort_t)oldlen; 487 dirp->d_reclen = (ushort_t)oldlen; 488 489 /* LINTED dirp is aligned and DIRSIZ() forces oldlen to be aligned */ 490 dirp = (struct direct *)(((char *)dirp) + oldlen); 491 dirp->d_ino = idesc->id_parent; /* ino to be entered is in id_parent */ 492 dirp->d_reclen = newent.d_reclen; 493 dirp->d_namlen = newent.d_namlen; 494 (void) memmove(dirp->d_name, idesc->id_name, 495 (size_t)newent.d_namlen + 1); 496 497 return (ALTERED|STOP); 498 } 499 500 static int 501 chgino(struct inodesc *idesc) 502 { 503 struct direct *dirp = idesc->id_dirp; 504 505 if (memcmp(dirp->d_name, idesc->id_name, 506 (size_t)dirp->d_namlen + 1) != 0) 507 return (KEEPON); 508 dirp->d_ino = idesc->id_parent; 509 return (ALTERED|STOP); 510 } 511 512 int 513 linkup(fsck_ino_t orphan, fsck_ino_t parentdir, caddr_t name) 514 { 515 int rval; 516 struct dinode *dp; 517 int lostdir; 518 int lostshadow; 519 fsck_ino_t oldlfdir; 520 fsck_ino_t *intree; 521 struct inodesc idesc; 522 523 init_inodesc(&idesc); 524 dp = ginode(orphan); 525 lostdir = (((dp->di_mode & IFMT) == IFDIR) || 526 ((dp->di_mode & IFMT) == IFATTRDIR)); 527 if (debug && lostdir && dp->di_nlink <= 0 && lncntp[orphan] == -1) 528 (void) printf( 529 "old fsck would have left inode %d for reclaim thread\n", 530 orphan); 531 lostshadow = (dp->di_mode & IFMT) == IFSHAD; 532 pwarn("UNREF %s ", file_id(orphan, dp->di_mode)); 533 pinode(orphan); 534 if (lostshadow || (dp->di_size == 0 && dp->di_oeftflag == 0)) 535 return (0); 536 if (!preen && (reply("RECONNECT") == 0)) 537 goto noconnect; 538 539 if (lfdir == 0) { 540 dp = ginode(UFSROOTINO); 541 idesc.id_name = lfname; 542 idesc.id_type = DATA; 543 idesc.id_func = findino; 544 idesc.id_number = UFSROOTINO; 545 idesc.id_fix = DONTKNOW; 546 if ((ckinode(dp, &idesc, CKI_TRAVERSE) & FOUND) != 0) { 547 lfdir = idesc.id_parent; 548 } else { 549 pwarn("NO %s DIRECTORY", lfname); 550 if (preen || reply("CREATE") == 1) { 551 lfdir = newdir(UFSROOTINO, (fsck_ino_t)0, 552 lfmode, lfname); 553 if (lfdir != 0) { 554 if (preen) 555 (void) printf(" (CREATED)\n"); 556 else 557 (void) printf("\n"); 558 statemap[lfdir] |= INFOUND; 559 /* 560 * XXX What if we allocate an inode 561 * that's already been scanned? Then 562 * we need to leave lnctnp[] alone. 563 */ 564 TRACK_LNCNTP(UFSROOTINO, 565 lncntp[UFSROOTINO]++); 566 } 567 } 568 } 569 if (lfdir == 0) { 570 pfatal("SORRY. CANNOT CREATE %s DIRECTORY\n", lfname); 571 pwarn("Could not reconnect inode %d\n", orphan); 572 goto noconnect; 573 } else { 574 /* 575 * We searched for it via the namespace, so by 576 * definition it's been found. We have to do this 577 * because it is possible that we're called before 578 * the full namespace mapping is complete (especially 579 * from pass 1, if it encounters a corrupt directory 580 * that has to be cleared). 581 */ 582 statemap[lfdir] |= INFOUND; 583 } 584 } 585 dp = ginode(lfdir); 586 if ((dp->di_mode & IFMT) != IFDIR) { 587 pfatal("%s IS NOT A DIRECTORY", lfname); 588 if (reply("REALLOCATE") == 0) { 589 iscorrupt = 1; 590 goto noconnect; 591 } 592 oldlfdir = lfdir; 593 lfdir = reallocdir(UFSROOTINO, (fsck_ino_t)0, lfmode, lfname); 594 if (lfdir == 0) { 595 iscorrupt = 1; 596 pfatal("SORRY. CANNOT CREATE %s DIRECTORY\n\n", 597 lfname); 598 goto noconnect; 599 } 600 inodirty(); 601 statemap[lfdir] |= INFOUND; 602 freeino(oldlfdir, TI_PARENT); 603 } 604 if (statemap[lfdir] != DFOUND) { 605 /* 606 * Not a consistency problem of the sort that'll 607 * cause the kernel heartburn, so don't set iscorrupt. 608 */ 609 if (debug) 610 (void) printf("lfdir %d is in state 0x%x\n", 611 lfdir, (int)statemap[lfdir]); 612 lfdir = 0; 613 pfatal("SORRY. %s DIRECTORY DISAPPEARED\n\n", lfname); 614 pwarn("Could not reconnect inode %d\n", orphan); 615 goto noconnect; 616 } 617 618 rval = do_reconnect(orphan, parentdir, name); 619 620 return (rval); 621 622 /* 623 * Leaving things unconnected is harmless as far as trying to 624 * use the filesystem later, so don't set iscorrupt yet (it's 625 * just lost blocks and inodes, after all). 626 * 627 * Lost directories get noted for reporting after all checks 628 * are done - they may get cleared later. 629 */ 630 noconnect: 631 if (lostdir) { 632 intree = tsearch((void *)orphan, &limbo_dirs, 633 ino_t_cmp); 634 if (intree == NULL) 635 errexit("linkup: out of memory"); 636 } 637 return (0); 638 } 639 640 /* 641 * Connect an orphaned inode to lost+found. 642 * 643 * Returns non-zero for success, zero for failure. 644 */ 645 static int 646 do_reconnect(fsck_ino_t orphan, fsck_ino_t parentdir, caddr_t name) 647 { 648 caddr_t flow_msg; 649 struct dinode *dp; 650 int lostdir; 651 mode_t mode; 652 fsck_ino_t *intree; 653 struct inodesc idesc; 654 655 dp = ginode(orphan); 656 mode = dp->di_mode & IFMT; 657 lostdir = (mode == IFDIR) || (mode == IFATTRDIR); 658 659 name = mkuniqname(name, lfname, lfdir, orphan); 660 if (name == NULL) 661 goto noconnect; 662 if (makeentry(lfdir, orphan, name) == 0) { 663 pfatal("SORRY. NO SPACE IN %s DIRECTORY\n", lfname); 664 pwarn("Could not reconnect inode %d\n", orphan); 665 goto noconnect; 666 } 667 668 dp = ginode(orphan); 669 LINK_RANGE(flow_msg, lncntp[orphan], -1); 670 if (flow_msg != NULL) { 671 LINK_CLEAR(flow_msg, orphan, dp->di_mode, &idesc); 672 if (statemap[orphan] == USTATE) 673 goto noconnect; 674 } 675 TRACK_LNCNTP(orphan, lncntp[orphan]--); 676 677 /* 678 * Make sure that anything we put into the normal namespace 679 * looks like it belongs there. Attributes can only be in 680 * attribute directories, not the normal directory lost+found. 681 */ 682 maybe_convert_attrdir_to_dir(orphan); 683 684 if (lostdir) { 685 /* 686 * Can't be creating a duplicate entry with makeentry(), 687 * because changeino() will succeed if ".." already 688 * exists. 689 */ 690 if ((changeino(orphan, "..", lfdir) & ALTERED) == 0 && 691 parentdir != (fsck_ino_t)-1) 692 (void) makeentry(orphan, lfdir, ".."); 693 /* 694 * If we were half-detached, don't try to get 695 * inode 0 later on. 696 */ 697 if (parentdir == 0) 698 parentdir = -1; 699 /* 700 * Fix up link counts. 701 * 702 * XXX This section is getting pretty byzantine, espcially 703 * when combined with changeino()/chgino()'s link manipulation. 704 */ 705 LFDIR_LINK_RANGE_RVAL(flow_msg, lncntp[lfdir], 1, &idesc, 0); 706 TRACK_LNCNTP(lfdir, lncntp[lfdir]--); 707 pwarn("DIR I=%lu CONNECTED. ", (long)orphan); 708 reattached_dir = 1; 709 if (parentdir != (fsck_ino_t)-1) { 710 /* 711 * Have to clear the parent's reference. Otherwise, 712 * if it's an orphan, then we may clear this orphan 713 * in pass 4 even though we've reconnected it. 714 * 715 * We already have the reference count 716 * allowing for a parent link, so undo the 717 * adjustment done above. Otherwise we come 718 * out high by one. 719 */ 720 (void) printf("PARENT WAS I=%lu\n", (long)parentdir); 721 (void) cleardirentry(parentdir, orphan); 722 } 723 if (!preen) 724 (void) printf("\n"); 725 } else if (preen) { 726 (void) printf(" (RECONNECTED)\n"); 727 } 728 729 statemap[orphan] &= ~INDELAYD; 730 return (1); 731 732 /* 733 * Leaving things unconnected is harmless as far as trying to 734 * use the filesystem later, so don't set iscorrupt yet (it's 735 * just lost blocks and inodes, after all). 736 * 737 * Lost directories get noted for reporting after all checks 738 * are done - they may get cleared later. 739 */ 740 noconnect: 741 if (lostdir) { 742 intree = tsearch((void *)orphan, &limbo_dirs, 743 ino_t_cmp); 744 if (intree == NULL) 745 errexit("linkup: out of memory"); 746 } 747 return (0); 748 } 749 750 /* 751 * fix an entry in a directory. 752 */ 753 int 754 changeino(fsck_ino_t dir, char *name, fsck_ino_t newnum) 755 { 756 struct inodesc idesc; 757 758 init_inodesc(&idesc); 759 idesc.id_type = DATA; 760 idesc.id_func = chgino; 761 idesc.id_number = dir; 762 idesc.id_fix = DONTKNOW; 763 idesc.id_name = name; 764 idesc.id_parent = newnum; /* new value for name */ 765 return (ckinode(ginode(dir), &idesc, CKI_TRAVERSE)); 766 } 767 768 /* 769 * make an entry in a directory 770 */ 771 int 772 makeentry(fsck_ino_t parent, fsck_ino_t ino, char *name) 773 { 774 int repeat; 775 struct dinode *dp; 776 struct inoinfo *iip; 777 struct inodesc idesc; 778 char pathbuf[MAXPATHLEN + 1]; 779 780 if (parent < UFSROOTINO || parent >= maxino || 781 ino < UFSROOTINO || ino >= maxino) 782 return (0); 783 init_inodesc(&idesc); 784 idesc.id_type = DATA; 785 idesc.id_func = mkentry; 786 idesc.id_number = parent; 787 idesc.id_parent = ino; /* this is the inode to enter */ 788 idesc.id_fix = DONTKNOW; 789 idesc.id_name = name; 790 791 repeat = 0; 792 again: 793 dp = ginode(parent); 794 if ((dp->di_size % DIRBLKSIZ) != 0) { 795 dp->di_size = roundup(dp->di_size, DIRBLKSIZ); 796 inodirty(); 797 798 iip = getinoinfo(ino); 799 if (iip != NULL) 800 iip->i_isize = dp->di_size; 801 } 802 803 if ((ckinode(dp, &idesc, CKI_TRAVERSE) & ALTERED) != 0) { 804 iip = getinoinfo(ino); 805 if (iip != NULL) 806 iip->i_isize = dp->di_size; 807 808 return (1); 809 } 810 811 if (repeat == 0) { 812 getpathname(pathbuf, parent, parent); 813 if (expanddir(parent, pathbuf) == 0) 814 return (0); 815 816 repeat = 1; 817 goto again; 818 } 819 820 return (0); 821 } 822 823 /* 824 * Attempt to expand the size of a directory 825 */ 826 static int 827 expanddir(fsck_ino_t ino, char *name) 828 { 829 struct bufarea *bpback, *bp[2]; 830 daddr32_t nxtibn, nxtbn; 831 daddr32_t newblk[2]; 832 struct dinode *dp; 833 char *cp; 834 int bc, f; 835 int n; 836 int allocIndir; 837 int frag2blks; 838 int lffragsz = 0; 839 int c = 0; 840 int retval = 0; 841 842 bp[0] = bp[1] = NULL; 843 844 dp = ginode(ino); 845 if (dp->di_size == 0) { 846 goto bail; 847 } 848 849 nxtbn = lblkno(&sblock, dp->di_size - 1) + 1; 850 851 /* 852 * Check that none of the nominally in-use direct block 853 * addresses for the directory are bogus. 854 */ 855 for (bc = 0; ((nxtbn > 0) && (bc < nxtbn) && (bc < NDADDR)); bc++) { 856 if (dp->di_db[bc] == 0) { 857 goto bail; 858 } 859 } 860 861 /* 862 * Determine our data block allocation needs. We always need to 863 * allocate at least one data block. We may need a second, the 864 * indirect block itself. 865 */ 866 allocIndir = 0; 867 nxtibn = -1; 868 n = 0; 869 870 if (nxtbn <= NDADDR) { 871 /* 872 * Still in direct blocks. Check for the unlikely 873 * case where the last block is a frag rather than 874 * a full block. This would only happen if someone had 875 * created a file in lost+found, and then that caused 876 * the dynamic directory shrinking capabilities of ufs 877 * to kick in. 878 * 879 * Note that we test nxtbn <= NDADDR, as it's the 880 * next block (i.e., one greater than the current/ 881 * actual block being examined). 882 */ 883 lffragsz = dp->di_size % sblock.fs_bsize; 884 } 885 886 if (nxtbn >= NDADDR && !lffragsz) { 887 n = sblock.fs_bsize / sizeof (daddr32_t); 888 nxtibn = nxtbn - NDADDR; 889 /* 890 * Only go one level of indirection 891 */ 892 if (nxtibn >= n) { 893 goto bail; 894 } 895 /* 896 * First indirect block means we need to pick up 897 * the actual indirect pointer block as well. 898 */ 899 if (nxtibn == 0) 900 allocIndir++; 901 } 902 903 /* 904 * Allocate all the new blocks we need. 905 */ 906 if ((newblk[0] = allocblk(sblock.fs_frag)) == 0) { 907 goto bail; 908 } 909 c++; 910 if (allocIndir) { 911 if ((newblk[1] = allocblk(sblock.fs_frag)) == 0) { 912 goto bail; 913 } 914 c++; 915 } 916 917 /* 918 * Take care of the block that will hold new directory entries. 919 * This one is always allocated. 920 */ 921 bp[0] = getdirblk(newblk[0], (size_t)sblock.fs_bsize); 922 if (bp[0]->b_errs) { 923 goto bail; 924 } 925 926 if (lffragsz) { 927 /* 928 * Preserve the partially-populated existing directory. 929 */ 930 bpback = getdirblk(dp->di_db[nxtbn - 1], 931 (size_t)dblksize(&sblock, dp, nxtbn - 1)); 932 if (!bpback->b_errs) { 933 (void) memmove(bp[0]->b_un.b_buf, bpback->b_un.b_buf, 934 (size_t)lffragsz); 935 } 936 } 937 938 /* 939 * Initialize the new fragments. lffragsz is zero if this 940 * is a completely-new block. 941 */ 942 for (cp = &(bp[0]->b_un.b_buf[lffragsz]); 943 cp < &(bp[0]->b_un.b_buf[sblock.fs_bsize]); 944 cp += DIRBLKSIZ) { 945 (void) memmove(cp, (char *)&emptydir, 946 sizeof (emptydir)); 947 } 948 dirty(bp[0]); 949 950 /* 951 * If we allocated the indirect block, zero it out. Otherwise 952 * read it in if we're using one. 953 */ 954 if (allocIndir) { 955 bp[1] = getdatablk(newblk[1], (size_t)sblock.fs_bsize); 956 if (bp[1]->b_errs) { 957 goto bail; 958 } 959 (void) memset(bp[1]->b_un.b_buf, 0, sblock.fs_bsize); 960 dirty(bp[1]); 961 } else if (nxtibn >= 0) { 962 /* Check that the indirect block pointer looks okay */ 963 if (dp->di_ib[0] == 0) { 964 goto bail; 965 } 966 bp[1] = getdatablk(dp->di_ib[0], (size_t)sblock.fs_bsize); 967 if (bp[1]->b_errs) { 968 goto bail; 969 } 970 971 for (bc = 0; ((bc < nxtibn) && (bc < n)); bc++) { 972 /* LINTED pointer cast alignment */ 973 if (((daddr32_t *)bp[1]->b_un.b_buf)[bc] == 0) { 974 goto bail; 975 } 976 } 977 } 978 979 /* 980 * Since the filesystem's consistency isn't affected by 981 * whether or not we actually do the expansion, iscorrupt 982 * is left alone for any of the approval paths. 983 */ 984 pwarn("NO SPACE LEFT IN %s", name); 985 if (!preen && (reply("EXPAND") == 0)) 986 goto bail; 987 988 /* 989 * Now that everything we need is gathered up and the 990 * necessary approvals acquired, we can make our provisional 991 * changes permanent. 992 */ 993 994 if (lffragsz) { 995 /* 996 * We've saved the data from the old end fragment(s) in 997 * our new block, so we can just swap the new one in. 998 * Make sure the size reflects the expansion of the 999 * final fragments/block. 1000 */ 1001 frag2blks = roundup(lffragsz, sblock.fs_fsize); 1002 freeblk(ino, dp->di_db[nxtbn - 1], 1003 frag2blks / sblock.fs_fsize); 1004 frag2blks = btodb(frag2blks); 1005 dp->di_size -= (u_offset_t)lffragsz; 1006 dp->di_blocks = dp->di_blocks - frag2blks; 1007 dp->di_db[nxtbn - 1] = newblk[0]; 1008 dp->di_size += (u_offset_t)sblock.fs_bsize; 1009 dp->di_blocks += btodb(sblock.fs_bsize); 1010 inodirty(); 1011 retval = 1; 1012 goto done; 1013 } 1014 1015 /* 1016 * Full-block addition's much easier. It's just an append. 1017 */ 1018 dp->di_size += (u_offset_t)sblock.fs_bsize; 1019 dp->di_blocks += btodb(sblock.fs_bsize); 1020 if (allocIndir) { 1021 dp->di_blocks += btodb(sblock.fs_bsize); 1022 } 1023 1024 inodirty(); 1025 if (nxtibn < 0) { 1026 /* 1027 * Still in direct blocks 1028 */ 1029 dp->di_db[nxtbn] = newblk[0]; 1030 } else { 1031 /* 1032 * Last indirect is always going to point at the 1033 * new directory buffer 1034 */ 1035 if (allocIndir) 1036 dp->di_ib[0] = newblk[1]; 1037 /* LINTED pointer case alignment */ 1038 ((daddr32_t *)bp[1]->b_un.b_buf)[nxtibn] = newblk[0]; 1039 dirty(bp[1]); 1040 } 1041 1042 if (preen) 1043 (void) printf(" (EXPANDED)\n"); 1044 1045 retval = 1; 1046 goto done; 1047 1048 bail: 1049 for (f = 0; f < c; f++) 1050 freeblk(ino, newblk[f], sblock.fs_frag); 1051 done: 1052 /* 1053 * bp[0] is handled by the directory cache's auto-release. 1054 */ 1055 if (bp[1] != NULL) 1056 brelse(bp[1]); 1057 1058 return (retval); 1059 } 1060 1061 static fsck_ino_t 1062 newdir(fsck_ino_t parent, fsck_ino_t request, int mode, caddr_t name) 1063 { 1064 fsck_ino_t dino; 1065 char pname[BUFSIZ]; 1066 1067 /* 1068 * This function creates a new directory and populates it with 1069 * "." and "..", then links to it as NAME in PARENT. 1070 */ 1071 dino = allocdir(parent, request, mode, 1); 1072 if (dino != 0) { 1073 getpathname(pname, parent, parent); 1074 name = mkuniqname(name, pname, parent, dino); 1075 /* 1076 * We don't touch numdirs, because it's just a cache of 1077 * what the filesystem claimed originally and is used 1078 * to calculate hash keys. 1079 */ 1080 if (makeentry(parent, dino, name) == 0) { 1081 freedir(dino, parent); 1082 dino = 0; 1083 } 1084 } 1085 1086 return (dino); 1087 } 1088 1089 /* 1090 * Replace whatever NAME refers to in PARENT with a new directory. 1091 * Note that if the old inode REQUEST is a directory, all of its 1092 * contents will be freed and reaped. 1093 */ 1094 static fsck_ino_t 1095 reallocdir(fsck_ino_t parent, fsck_ino_t request, int mode, caddr_t name) 1096 { 1097 int retval; 1098 fsck_ino_t newino; 1099 1100 if ((request != 0) && (statemap[request] != USTATE)) 1101 freeino(request, TI_PARENT); 1102 1103 newino = allocdir(parent, request, mode, 0); 1104 if (newino != 0) { 1105 retval = changeino(parent, name, newino); 1106 if ((retval & ALTERED) == 0) { 1107 /* 1108 * No change made, so name doesn't exist, so 1109 * unwind allocation rather than leak it. 1110 */ 1111 freedir(newino, parent); 1112 newino = 0; 1113 } 1114 } 1115 1116 return (newino); 1117 } 1118 1119 /* 1120 * allocate a new directory 1121 */ 1122 fsck_ino_t 1123 allocdir(fsck_ino_t parent, fsck_ino_t request, int mode, int update_parent) 1124 { 1125 fsck_ino_t ino; 1126 caddr_t cp; 1127 caddr_t flow; 1128 struct dinode *dp; 1129 struct bufarea *bp; 1130 struct inoinfo *inp; 1131 struct inodesc idesc; 1132 struct dirtemplate *dirp; 1133 1134 ino = allocino(request, IFDIR|mode); 1135 if (ino == 0) 1136 return (0); 1137 dirp = &dirhead; 1138 dirp->dot_ino = ino; 1139 dirp->dotdot_ino = parent; 1140 dp = ginode(ino); 1141 bp = getdirblk(dp->di_db[0], (size_t)sblock.fs_fsize); 1142 if (bp->b_errs) { 1143 freeino(ino, TI_PARENT); 1144 return (0); 1145 } 1146 (void) memmove(bp->b_un.b_buf, (void *)dirp, 1147 sizeof (struct dirtemplate)); 1148 for (cp = &bp->b_un.b_buf[DIRBLKSIZ]; 1149 cp < &bp->b_un.b_buf[sblock.fs_fsize]; 1150 cp += DIRBLKSIZ) 1151 (void) memmove(cp, (void *)&emptydir, sizeof (emptydir)); 1152 dirty(bp); 1153 dp->di_nlink = 2; 1154 inodirty(); 1155 if (!inocached(ino)) { 1156 cacheino(dp, ino); 1157 } else { 1158 /* 1159 * re-using an old directory inode 1160 */ 1161 inp = getinoinfo(ino); 1162 if (inp == NULL) { 1163 if (debug) 1164 errexit("allocdir got NULL from getinoinfo " 1165 "for existing entry I=%d\n", 1166 ino); 1167 cacheino(dp, ino); 1168 } else { 1169 init_inoinfo(inp, dp, ino); 1170 inp->i_parent = parent; 1171 inp->i_dotdot = parent; 1172 } 1173 } 1174 1175 /* 1176 * Short-circuit all the dancing around below if it's the 1177 * root inode. The net effect's the same. 1178 */ 1179 if (ino == UFSROOTINO) { 1180 TRACK_LNCNTP(ino, lncntp[ino] = dp->di_nlink); 1181 return (ino); 1182 } 1183 1184 if (!update_parent) 1185 return (ino); 1186 1187 /* 1188 * We never create attribute directories, which can have 1189 * non-directory parents. So, the parent of the directory 1190 * we're creating must itself be a directory. 1191 */ 1192 if (!INO_IS_DVALID(parent)) { 1193 freeino(ino, TI_PARENT); 1194 return (0); 1195 } 1196 1197 /* 1198 * Make sure the parent can handle another link. 1199 * Since we might only update one version of the 1200 * count (disk versus in-memory), we have to check both. 1201 */ 1202 LINK_RANGE(flow, lncntp[parent], -1); 1203 if (flow == NULL) 1204 LINK_RANGE(flow, (int)dp->di_nlink, 1); 1205 1206 if (flow != NULL) { 1207 LINK_CLEAR(flow, parent, dp->di_mode, &idesc); 1208 if (statemap[parent] == USTATE) { 1209 /* 1210 * No parent any more, so bail out. Callers 1211 * are expected to handle this possibility. 1212 * Since most just throw up their hands if 1213 * we return 0, this just happens to work. 1214 */ 1215 freeino(ino, TI_PARENT); 1216 return (0); 1217 } 1218 } 1219 1220 /* 1221 * We've created a directory with two entries, "." and "..", 1222 * and a link count of two ("." and one from its parent). If 1223 * the parent's not been scanned yet, which means this inode 1224 * will get scanned later as well, then make our in-core count 1225 * match what we pushed out to disk. Similarly, update the 1226 * parent. On the other hand, if the parent's already been 1227 * looked at (statemap[ino] == DFOUND), the discrepancy 1228 * between lncntp[] and di_nlink will be noted later, with 1229 * appropriate reporting and propagation, in pass2b. 1230 * 1231 * We're explicitly skipping where the parent was DZLINK or 1232 * DFOUND. If it has zero links, it can't be gotten to, so 1233 * we want a discrepancy set up that will be caught in pass2b. 1234 * DFOUND was discussed above. 1235 * 1236 * Regarding the claim of a link from the parent: we've not 1237 * done anything to create such a link here. We depend on the 1238 * semantics of our callers attaching the inode we return to 1239 * an existing entry in the directory or creating the entry 1240 * themselves, but in either case, not modifying the link 1241 * count. 1242 * 1243 * Note that setting lncntp[ino] to zero means that both claimed 1244 * links have been ``found''. 1245 */ 1246 statemap[ino] = statemap[parent]; 1247 if (INO_IS_DVALID(parent)) { 1248 TRACK_LNCNTP(ino, lncntp[ino] = 0); 1249 TRACK_LNCNTP(parent, lncntp[parent]--); 1250 } 1251 dp = ginode(parent); 1252 dp->di_nlink++; 1253 inodirty(); 1254 return (ino); 1255 } 1256 1257 /* 1258 * free a directory inode 1259 */ 1260 static void 1261 freedir(fsck_ino_t ino, fsck_ino_t parent) 1262 { 1263 struct inoinfo *iip; 1264 1265 if (ino != parent) { 1266 /* 1267 * Make sure that the desired parent gets a link 1268 * count update from freeino()/truncino(). If 1269 * we can't look it up, then it's not really a 1270 * directory, so there's nothing to worry about. 1271 */ 1272 iip = getinoinfo(ino); 1273 if (iip != NULL) 1274 iip->i_parent = parent; 1275 } 1276 freeino(ino, TI_PARENT); 1277 } 1278 1279 /* 1280 * generate a temporary name for use in the lost+found directory. 1281 */ 1282 static void 1283 lftempname(char *bufp, fsck_ino_t ino) 1284 { 1285 fsck_ino_t in; 1286 caddr_t cp; 1287 int namlen; 1288 1289 cp = bufp + 2; 1290 for (in = maxino; in > 0; in /= 10) 1291 cp++; 1292 *--cp = '\0'; 1293 /* LINTED difference will not overflow an int */ 1294 namlen = cp - bufp; 1295 if ((namlen > BUFSIZ) || (namlen > MAXPATHLEN)) { 1296 errexit("buffer overflow in lftempname()\n"); 1297 } 1298 1299 in = ino; 1300 while (cp > bufp) { 1301 *--cp = (in % 10) + '0'; 1302 in /= 10; 1303 } 1304 *cp = '#'; 1305 } 1306 1307 /* 1308 * Get a directory block. 1309 * Insure that it is held until another is requested. 1310 * 1311 * Our callers are expected to check for errors and/or be 1312 * prepared to handle blocks of zeros in the middle of a 1313 * directory. 1314 */ 1315 static struct bufarea * 1316 getdirblk(daddr32_t blkno, size_t size) 1317 { 1318 if (pdirbp != 0) { 1319 brelse(pdirbp); 1320 } 1321 pdirbp = getdatablk(blkno, size); 1322 return (pdirbp); 1323 } 1324 1325 /* 1326 * Create a unique name for INODE to be created in directory PARENT. 1327 * Use NAME if it is provided (non-NULL) and doesn't already exist. 1328 * Returning NULL indicates no unique name could be generated. 1329 * 1330 * If we were given a name, and it conflicts with an existing 1331 * entry, use our usual temp name instead. Without this check, 1332 * we could end up creating duplicate entries for multiple 1333 * orphaned directories in lost+found with the same name (but 1334 * different parents). Of course, our usual name might already 1335 * be in use as well, so be paranoid. 1336 * 1337 * We could do something like keep tacking something onto the 1338 * end of tempname until we come up with something that's not 1339 * in use, but that has liabilities as well. This is a 1340 * sufficiently rare case that it's not worth going that 1341 * overboard for. 1342 */ 1343 static caddr_t 1344 mkuniqname(caddr_t name, caddr_t pname, fsck_ino_t parent, fsck_ino_t inode) 1345 { 1346 fsck_ino_t oldino; 1347 struct dinode *dp; 1348 caddr_t flow_msg; 1349 struct inodesc idesc; 1350 static char tempname[BUFSIZ]; 1351 1352 lftempname(tempname, inode); 1353 if ((name != NULL) && 1354 (lookup_named_ino(parent, name) != 0)) { 1355 name = NULL; 1356 } 1357 if (name == NULL) { 1358 /* 1359 * No name given, or it wasn't unique. 1360 */ 1361 name = tempname; 1362 if ((oldino = lookup_named_ino(parent, name)) != 0) { 1363 pfatal( 1364 "Name ``%s'' for inode %d already exists in %s \n", 1365 name, oldino, pname); 1366 if (reply("REMOVE OLD ENTRY") == 0) { 1367 if (parent == lfdir) 1368 pwarn( 1369 "Could not reconnect inode %d\n\n", 1370 inode); 1371 else 1372 pwarn( 1373 "Could not create entry for %d\n\n", 1374 inode); 1375 name = NULL; 1376 goto noconnect; 1377 } 1378 (void) changeino(parent, name, inode); 1379 LINK_RANGE(flow_msg, lncntp[oldino], 1); 1380 if (flow_msg != NULL) { 1381 /* 1382 * Do a best-effort, but if we're not 1383 * allowed to do the clear, the fs is 1384 * corrupt in any case, so just carry on. 1385 */ 1386 dp = ginode(oldino); 1387 LINK_CLEAR(flow_msg, oldino, dp->di_mode, 1388 &idesc); 1389 if (statemap[oldino] != USTATE) 1390 iscorrupt = 1; 1391 } else { 1392 TRACK_LNCNTP(oldino, lncntp[oldino]++); 1393 } 1394 } 1395 } 1396 1397 noconnect: 1398 return (name); 1399 } 1400