1 /* 2 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 7 /* All Rights Reserved */ 8 9 /* 10 * Copyright (c) 1980, 1986, 1990 The Regents of the University of California. 11 * All rights reserved. 12 * 13 * Redistribution and use in source and binary forms are permitted 14 * provided that: (1) source distributions retain this entire copyright 15 * notice and comment, and (2) distributions including binaries display 16 * the following acknowledgement: ``This product includes software 17 * developed by the University of California, Berkeley and its contributors'' 18 * in the documentation or other materials provided with the distribution 19 * and in all advertising materials mentioning features or use of this 20 * software. Neither the name of the University nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific prior written permission. 23 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 24 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 25 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 26 */ 27 28 #include <sys/param.h> 29 #include <sys/types.h> 30 #include <sys/sysmacros.h> 31 #include <sys/mntent.h> 32 #include <string.h> 33 #include <stdarg.h> 34 #include <sys/fs/ufs_fs.h> 35 #include <sys/vnode.h> 36 #include <sys/fs/ufs_inode.h> 37 #define _KERNEL 38 #include <sys/fs/ufs_fsdir.h> 39 #undef _KERNEL 40 #include "fsck.h" 41 42 struct rc_queue { 43 struct rc_queue *rc_next; 44 fsck_ino_t rc_orphan; 45 fsck_ino_t rc_parent; 46 caddr_t rc_name; 47 }; 48 49 caddr_t lfname = "lost+found"; /* name to use for l+f dir */ 50 static int lfmode = 01700; /* mode to use when creating l+f dir */ 51 static struct dirtemplate emptydir = { 0, DIRBLKSIZ }; 52 static struct dirtemplate dirhead = { 53 0, 12, 1, ".", 0, DIRBLKSIZ - 12, 2, ".." 54 }; 55 56 static void lftempname(char *, fsck_ino_t); 57 static int do_reconnect(fsck_ino_t, fsck_ino_t, caddr_t); 58 static caddr_t mkuniqname(caddr_t, caddr_t, fsck_ino_t, fsck_ino_t); 59 static int chgino(struct inodesc *); 60 static int dircheck(struct inodesc *, struct direct *); 61 static int expanddir(fsck_ino_t, char *); 62 static void freedir(fsck_ino_t, fsck_ino_t); 63 static struct direct *fsck_readdir(struct inodesc *); 64 static struct bufarea *getdirblk(daddr32_t, size_t); 65 static int mkentry(struct inodesc *); 66 static fsck_ino_t newdir(fsck_ino_t, fsck_ino_t, int, caddr_t); 67 static fsck_ino_t reallocdir(fsck_ino_t, fsck_ino_t, int, caddr_t); 68 69 /* 70 * Propagate connected state through the tree. 71 */ 72 void 73 propagate(void) 74 { 75 struct inoinfo **inpp, *inp; 76 struct inoinfo **inpend; 77 int change, inorphan; 78 79 inpend = &inpsort[inplast]; 80 do { 81 change = 0; 82 for (inpp = inpsort; inpp < inpend; inpp++) { 83 inp = *inpp; 84 if (inp->i_parent == 0) 85 continue; 86 if (statemap[inp->i_parent] == DFOUND && 87 INO_IS_DUNFOUND(inp->i_number)) { 88 inorphan = statemap[inp->i_number] & INORPHAN; 89 statemap[inp->i_number] = DFOUND | inorphan; 90 change++; 91 } 92 } 93 } while (change > 0); 94 } 95 96 /* 97 * Scan each entry in a directory block. 98 */ 99 int 100 dirscan(struct inodesc *idesc) 101 { 102 struct direct *dp; 103 struct bufarea *bp; 104 uint_t dsize, n; 105 size_t blksiz; 106 union { /* keep lint happy about alignment */ 107 char dbuf[DIRBLKSIZ]; 108 struct direct dir; 109 } u; 110 111 if (idesc->id_type != DATA) 112 errexit("wrong type to dirscan %d\n", idesc->id_type); 113 if (idesc->id_entryno == 0 && 114 (idesc->id_filesize & (DIRBLKSIZ - 1)) != 0) 115 idesc->id_filesize = roundup(idesc->id_filesize, DIRBLKSIZ); 116 blksiz = idesc->id_numfrags * sblock.fs_fsize; 117 if (chkrange(idesc->id_blkno, idesc->id_numfrags)) { 118 idesc->id_filesize -= (offset_t)blksiz; 119 return (SKIP); 120 } 121 idesc->id_loc = 0; 122 for (dp = fsck_readdir(idesc); dp != NULL; dp = fsck_readdir(idesc)) { 123 /* 124 * If we were just passed a corrupt directory entry with 125 * d_reclen > DIRBLKSIZ, we don't want to memmove() all over 126 * our stack. This directory gets cleaned up later. 127 */ 128 dsize = MIN(dp->d_reclen, sizeof (u.dbuf)); 129 (void) memmove((void *)u.dbuf, (void *)dp, (size_t)dsize); 130 idesc->id_dirp = &u.dir; 131 if ((n = (*idesc->id_func)(idesc)) & ALTERED) { 132 /* 133 * We can ignore errors from getdirblk() here, 134 * as the block is still in memory thanks to 135 * buffering and fsck_readdir(). If there was 136 * an error reading it before, then all decisions 137 * leading to getting us here were based on the 138 * resulting zeros. As such, we have nothing 139 * to worry about at this point. 140 */ 141 bp = getdirblk(idesc->id_blkno, blksiz); 142 (void) memmove((void *)(bp->b_un.b_buf + 143 idesc->id_loc - dsize), 144 (void *)u.dbuf, (size_t)dsize); 145 dirty(bp); 146 sbdirty(); 147 } 148 if (n & STOP) 149 return (n); 150 } 151 return (idesc->id_filesize > 0 ? KEEPON : STOP); 152 } 153 154 /* 155 * Get current entry in a directory (and peek at the next entry). 156 */ 157 static struct direct * 158 fsck_readdir(struct inodesc *idesc) 159 { 160 struct direct *dp, *ndp = 0; 161 struct bufarea *bp; 162 ushort_t size; /* of directory entry */ 163 size_t blksiz; 164 int dofixret; 165 int salvaged; /* when to report SALVAGED in preen mode */ 166 int origloc = idesc->id_loc; 167 168 blksiz = idesc->id_numfrags * sblock.fs_fsize; 169 /* 170 * Sanity check id_filesize and id_loc fields. The latter 171 * has to be within the block we're looking at, as well as 172 * aligned to a four-byte boundary. The alignment is due to 173 * a struct direct containing four-byte integers. It's 174 * unfortunate that the four is a magic number, but there's 175 * really no good way to derive it from the ufs header files. 176 */ 177 if ((idesc->id_filesize <= 0) || (idesc->id_loc >= blksiz) || 178 ((idesc->id_loc & 3) != 0)) 179 return (NULL); 180 /* 181 * We don't have to worry about holes in the directory's 182 * block list, because that was checked for when the 183 * inode was first encountered during pass1. We never 184 * scan a directory until after we've vetted its block list. 185 */ 186 /* 187 * We can ignore errors from getdirblk() here, as dircheck() 188 * will reject any entries that would have been in the bad 189 * sectors (fsck_bread() fills in zeros on failures). The main 190 * reject keys are that d_reclen would be zero and/or that it 191 * is less than the minimal size of a directory entry. Since 192 * entries can't span sectors, there's no worry about having 193 * a good beginning in one sector and the rest in the next, 194 * where that second sector was unreadable and therefore 195 * replaced with zeros. 196 */ 197 bp = getdirblk(idesc->id_blkno, blksiz); 198 /* LINTED b_buf is aligned and id_loc was verified above */ 199 dp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc); 200 201 /* 202 * Check the current entry in the directory. 203 */ 204 if (dircheck(idesc, dp) == 0) { 205 /* 206 * If we are in here, then either the current directory 207 * entry is bad or the next directory entry is bad. 208 */ 209 next_is_bad: 210 /* 211 * Find the amount of space left to the end of the 212 * directory block for either directory entry. 213 */ 214 size = DIRBLKSIZ - (idesc->id_loc & (DIRBLKSIZ - 1)); 215 216 /* 217 * Advance to the end of the directory block. 218 */ 219 idesc->id_loc += size; 220 idesc->id_filesize -= (offset_t)size; 221 222 /* 223 * Ask the question before we fix the in-core directory 224 * block because dofix() may reuse the buffer. 225 */ 226 salvaged = (idesc->id_fix == DONTKNOW); 227 dofixret = dofix(idesc, "DIRECTORY CORRUPTED"); 228 229 /* 230 * If there was an error reading the block, then that 231 * same error can reasonably be expected to have occurred 232 * when it was read previously. As such, the decision 233 * to come here was based on the results of that partially- 234 * zerod block, and so anything we change should be 235 * based on it as well. Upshot: no need to check for 236 * errors here. 237 */ 238 bp = getdirblk(idesc->id_blkno, blksiz); 239 /* LINTED b_buf is aligned and id_loc/origloc was verified */ 240 dp = (struct direct *)(bp->b_un.b_buf + origloc); 241 242 /* 243 * This is the current directory entry and since it is 244 * corrupt we cannot trust the rest of the directory 245 * block so change the current directory entry to 246 * contain nothing and encompass the rest of the block. 247 */ 248 if (ndp == NULL) { 249 dp->d_reclen = size; 250 dp->d_ino = 0; 251 dp->d_namlen = 0; 252 dp->d_name[0] = '\0'; 253 } 254 /* 255 * This is the next directory entry, i.e., we got here 256 * via a "goto next_is_bad". That directory entry is 257 * corrupt. However, the current directory entry is okay 258 * so if we are in fix mode, just extend its record size 259 * to encompass the rest of the block. 260 */ 261 else if (dofixret) { 262 dp->d_reclen += size; 263 } 264 /* 265 * If the user said to fix the directory corruption, then 266 * mark the block as dirty. Otherwise, our "repairs" only 267 * apply to the in-core copy so we don't hand back trash 268 * to the caller. 269 * 270 * Note: It is possible that saying "no" to a change in 271 * one part of the I/O buffer and "yes" to a later change 272 * in the same I/O buffer may still flush the change to 273 * which we said "no". This is the pathological case and 274 * no fix is planned at this time. 275 */ 276 if (dofixret) { 277 dirty(bp); 278 if (preen && salvaged) 279 (void) printf(" (SALVAGED)\n"); 280 if (idesc->id_number == lfdir) 281 lfdir = 0; 282 } 283 284 /* 285 * dp points into bp, which will get re-used at some 286 * arbitrary time in the future. We rely on the fact 287 * that we're singled-threaded, and that we'll be done 288 * with this directory entry by the time the next one 289 * is needed. 290 */ 291 return (dp); 292 } 293 /* 294 * The current directory entry checked out so advance past it. 295 */ 296 idesc->id_loc += dp->d_reclen; 297 idesc->id_filesize -= (offset_t)dp->d_reclen; 298 /* 299 * If we are not at the directory block boundary, then peek 300 * at the next directory entry and if it is bad we can add 301 * its space to the current directory entry (compression). 302 * Again, we sanity check the id_loc and id_filesize fields 303 * since we modified them above. 304 */ 305 if ((idesc->id_loc & (DIRBLKSIZ - 1)) && /* not at start */ 306 (idesc->id_loc < blksiz) && /* within block */ 307 ((idesc->id_loc & 3) == 0) && /* properly aligned */ 308 (idesc->id_filesize > 0)) { /* data follows */ 309 /* LINTED b_buf is aligned and id_loc verified to be ok */ 310 ndp = (struct direct *)(bp->b_un.b_buf + idesc->id_loc); 311 if (dircheck(idesc, ndp) == 0) 312 goto next_is_bad; 313 } 314 315 /* 316 * See comment above about dp pointing into bp. 317 */ 318 return (dp); 319 } 320 321 /* 322 * Verify that a directory entry is valid. 323 * This is a superset of the checks made in the kernel. 324 */ 325 static int 326 dircheck(struct inodesc *idesc, struct direct *dp) 327 { 328 size_t size; 329 char *cp; 330 int spaceleft; 331 332 /* 333 * Recall that id_filesize is the number of bytes left to 334 * process in the directory. We check id_filesize >= size 335 * instead of id_filesize >= d_reclen because all that the 336 * directory is actually required to contain is the entry 337 * itself (and it's how the kernel does the allocation). 338 * 339 * We indirectly check for d_reclen going past the end of 340 * the allocated space by comparing it against spaceleft. 341 */ 342 size = DIRSIZ(dp); 343 spaceleft = DIRBLKSIZ - (idesc->id_loc % DIRBLKSIZ); 344 if (dp->d_ino < maxino && 345 dp->d_reclen != 0 && 346 (int)dp->d_reclen <= spaceleft && 347 (dp->d_reclen & 0x3) == 0 && 348 (int)dp->d_reclen >= size && 349 idesc->id_filesize >= (offset_t)size && 350 dp->d_namlen <= MAXNAMLEN) { 351 if (dp->d_ino == 0) 352 return (1); 353 for (cp = dp->d_name, size = 0; size < (size_t)dp->d_namlen; 354 size++, cp++) 355 if ((*cp == '\0') || (*cp == '/')) 356 goto bad; 357 if (*cp == '\0') 358 return (1); 359 } 360 bad: 361 if (debug) { 362 (void) printf("Bad dir in inode %d at lbn %d, loc %d:\n", 363 idesc->id_number, idesc->id_lbn, idesc->id_loc); 364 (void) printf(" ino %d reclen %d namlen %d name `%s'\n", 365 dp->d_ino, dp->d_reclen, dp->d_namlen, dp->d_name); 366 } 367 return (0); 368 } 369 370 void 371 adjust(struct inodesc *idesc, int lcnt) 372 { 373 struct dinode *dp; 374 caddr_t flow; 375 int saveiscorrupt; 376 struct inodesc lcidesc; 377 378 dp = ginode(idesc->id_number); 379 if (dp->di_nlink == lcnt) { 380 /* 381 * If we have not hit any unresolved problems, are running 382 * in preen mode, and are on a file system using logging, 383 * then just toss any partially allocated files, as they are 384 * an expected occurrence. 385 */ 386 if (!iscorrupt && preen && islog) { 387 clri(idesc, "UNREF", CLRI_VERBOSE, CLRI_NOP_OK); 388 return; 389 } else { 390 /* 391 * The file system can be considered clean even if 392 * a file is not linked up, but is cleared. In 393 * other words, the kernel won't panic over it. 394 * Hence, iscorrupt should not be set when 395 * linkup is answered no, but clri is answered yes. 396 * 397 * If neither is answered yes, then we have a 398 * non-panic-inducing known corruption that the 399 * user needs to be reminded of when we exit. 400 */ 401 saveiscorrupt = iscorrupt; 402 if (linkup(idesc->id_number, (fsck_ino_t)0, 403 NULL) == 0) { 404 iscorrupt = saveiscorrupt; 405 clri(idesc, "UNREF", CLRI_QUIET, CLRI_NOP_OK); 406 if (statemap[idesc->id_number] != USTATE) 407 iscorrupt = 1; 408 return; 409 } 410 dp = ginode(idesc->id_number); 411 } 412 lcnt = lncntp[idesc->id_number]; 413 } 414 415 /* 416 * It doesn't happen often, but it's possible to get a true 417 * excess of links (especially if a lot of directories got 418 * orphaned and reattached to lost+found). Instead of wrapping 419 * around, do something semi-useful (i.e., give progress towards 420 * a less-broken filesystem) when this happens. 421 */ 422 LINK_RANGE(flow, dp->di_nlink, -lcnt); 423 if (flow != NULL) { 424 LINK_CLEAR(flow, idesc->id_number, dp->di_mode, &lcidesc); 425 if (statemap[idesc->id_number] == USTATE) 426 return; 427 } 428 429 dp = ginode(idesc->id_number); 430 if (lcnt && dp->di_nlink != lcnt) { 431 pwarn("LINK COUNT %s", 432 file_id(idesc->id_number, dp->di_mode)); 433 pinode(idesc->id_number); 434 dp = ginode(idesc->id_number); 435 (void) printf(" COUNT %d SHOULD BE %d", 436 dp->di_nlink, dp->di_nlink - lcnt); 437 /* 438 * Even lost+found is subject to this, as whenever 439 * we modify it, we update both the in-memory and 440 * on-disk counts. Thus, they should still be in 441 * sync. 442 */ 443 if (preen) { 444 if (lcnt < 0) { 445 (void) printf("\n"); 446 if ((dp->di_mode & IFMT) == IFSHAD) 447 pwarn("LINK COUNT INCREASING"); 448 else 449 pfatal("LINK COUNT INCREASING"); 450 } 451 } 452 if (preen || reply("ADJUST") == 1) { 453 dp->di_nlink -= lcnt; 454 inodirty(); 455 if (preen) 456 (void) printf(" (ADJUSTED)\n"); 457 } else if (((dp->di_mode & IFMT) == IFDIR) || 458 ((dp->di_mode & IFMT) == IFATTRDIR)) { 459 /* 460 * File counts can be off relatively harmlessly, 461 * but a bad directory count can cause the 462 * kernel to lose its mind. 463 */ 464 iscorrupt = 1; 465 } 466 } 467 } 468 469 static int 470 mkentry(struct inodesc *idesc) 471 { 472 struct direct *dirp = idesc->id_dirp; 473 struct direct newent; 474 int newlen, oldlen; 475 476 newent.d_namlen = strlen(idesc->id_name); 477 newlen = DIRSIZ(&newent); 478 if (dirp->d_ino != 0) 479 oldlen = DIRSIZ(dirp); 480 else 481 oldlen = 0; 482 if ((int)dirp->d_reclen - oldlen < newlen) 483 return (KEEPON); 484 newent.d_reclen = dirp->d_reclen - (ushort_t)oldlen; 485 dirp->d_reclen = (ushort_t)oldlen; 486 487 /* LINTED dirp is aligned and DIRSIZ() forces oldlen to be aligned */ 488 dirp = (struct direct *)(((char *)dirp) + oldlen); 489 dirp->d_ino = idesc->id_parent; /* ino to be entered is in id_parent */ 490 dirp->d_reclen = newent.d_reclen; 491 dirp->d_namlen = newent.d_namlen; 492 (void) memmove(dirp->d_name, idesc->id_name, 493 (size_t)newent.d_namlen + 1); 494 495 return (ALTERED|STOP); 496 } 497 498 static int 499 chgino(struct inodesc *idesc) 500 { 501 struct direct *dirp = idesc->id_dirp; 502 503 if (memcmp(dirp->d_name, idesc->id_name, 504 (size_t)dirp->d_namlen + 1) != 0) 505 return (KEEPON); 506 dirp->d_ino = idesc->id_parent; 507 return (ALTERED|STOP); 508 } 509 510 int 511 linkup(fsck_ino_t orphan, fsck_ino_t parentdir, caddr_t name) 512 { 513 int rval; 514 struct dinode *dp; 515 int lostdir; 516 int lostshadow; 517 fsck_ino_t oldlfdir; 518 fsck_ino_t *intree; 519 struct inodesc idesc; 520 521 init_inodesc(&idesc); 522 dp = ginode(orphan); 523 lostdir = (((dp->di_mode & IFMT) == IFDIR) || 524 ((dp->di_mode & IFMT) == IFATTRDIR)); 525 if (debug && lostdir && dp->di_nlink <= 0 && lncntp[orphan] == -1) 526 (void) printf( 527 "old fsck would have left inode %d for reclaim thread\n", 528 orphan); 529 lostshadow = (dp->di_mode & IFMT) == IFSHAD; 530 pwarn("UNREF %s ", file_id(orphan, dp->di_mode)); 531 pinode(orphan); 532 if (lostshadow || (dp->di_size == 0 && dp->di_oeftflag == 0)) 533 return (0); 534 if (!preen && (reply("RECONNECT") == 0)) 535 goto noconnect; 536 537 if (lfdir == 0) { 538 dp = ginode(UFSROOTINO); 539 idesc.id_name = lfname; 540 idesc.id_type = DATA; 541 idesc.id_func = findino; 542 idesc.id_number = UFSROOTINO; 543 idesc.id_fix = DONTKNOW; 544 if ((ckinode(dp, &idesc, CKI_TRAVERSE) & FOUND) != 0) { 545 lfdir = idesc.id_parent; 546 } else { 547 pwarn("NO %s DIRECTORY", lfname); 548 if (preen || reply("CREATE") == 1) { 549 lfdir = newdir(UFSROOTINO, (fsck_ino_t)0, 550 lfmode, lfname); 551 if (lfdir != 0) { 552 if (preen) 553 (void) printf(" (CREATED)\n"); 554 else 555 (void) printf("\n"); 556 statemap[lfdir] |= INFOUND; 557 /* 558 * XXX What if we allocate an inode 559 * that's already been scanned? Then 560 * we need to leave lnctnp[] alone. 561 */ 562 TRACK_LNCNTP(UFSROOTINO, 563 lncntp[UFSROOTINO]++); 564 } 565 } 566 } 567 if (lfdir == 0) { 568 pfatal("SORRY. CANNOT CREATE %s DIRECTORY\n", lfname); 569 pwarn("Could not reconnect inode %d\n", orphan); 570 goto noconnect; 571 } else { 572 /* 573 * We searched for it via the namespace, so by 574 * definition it's been found. We have to do this 575 * because it is possible that we're called before 576 * the full namespace mapping is complete (especially 577 * from pass 1, if it encounters a corrupt directory 578 * that has to be cleared). 579 */ 580 statemap[lfdir] |= INFOUND; 581 } 582 } 583 dp = ginode(lfdir); 584 if ((dp->di_mode & IFMT) != IFDIR) { 585 pfatal("%s IS NOT A DIRECTORY", lfname); 586 if (reply("REALLOCATE") == 0) { 587 iscorrupt = 1; 588 goto noconnect; 589 } 590 oldlfdir = lfdir; 591 lfdir = reallocdir(UFSROOTINO, (fsck_ino_t)0, lfmode, lfname); 592 if (lfdir == 0) { 593 iscorrupt = 1; 594 pfatal("SORRY. CANNOT CREATE %s DIRECTORY\n\n", 595 lfname); 596 goto noconnect; 597 } 598 inodirty(); 599 statemap[lfdir] |= INFOUND; 600 freeino(oldlfdir, TI_PARENT); 601 } 602 if (statemap[lfdir] != DFOUND) { 603 /* 604 * Not a consistency problem of the sort that'll 605 * cause the kernel heartburn, so don't set iscorrupt. 606 */ 607 if (debug) 608 (void) printf("lfdir %d is in state 0x%x\n", 609 lfdir, (int)statemap[lfdir]); 610 lfdir = 0; 611 pfatal("SORRY. %s DIRECTORY DISAPPEARED\n\n", lfname); 612 pwarn("Could not reconnect inode %d\n", orphan); 613 goto noconnect; 614 } 615 616 rval = do_reconnect(orphan, parentdir, name); 617 618 return (rval); 619 620 /* 621 * Leaving things unconnected is harmless as far as trying to 622 * use the filesystem later, so don't set iscorrupt yet (it's 623 * just lost blocks and inodes, after all). 624 * 625 * Lost directories get noted for reporting after all checks 626 * are done - they may get cleared later. 627 */ 628 noconnect: 629 if (lostdir) { 630 intree = tsearch((void *)orphan, &limbo_dirs, 631 ino_t_cmp); 632 if (intree == NULL) 633 errexit("linkup: out of memory"); 634 } 635 return (0); 636 } 637 638 /* 639 * Connect an orphaned inode to lost+found. 640 * 641 * Returns non-zero for success, zero for failure. 642 */ 643 static int 644 do_reconnect(fsck_ino_t orphan, fsck_ino_t parentdir, caddr_t name) 645 { 646 caddr_t flow_msg; 647 struct dinode *dp; 648 int lostdir; 649 mode_t mode; 650 fsck_ino_t *intree; 651 struct inodesc idesc; 652 653 dp = ginode(orphan); 654 mode = dp->di_mode & IFMT; 655 lostdir = (mode == IFDIR) || (mode == IFATTRDIR); 656 657 name = mkuniqname(name, lfname, lfdir, orphan); 658 if (name == NULL) 659 goto noconnect; 660 if (makeentry(lfdir, orphan, name) == 0) { 661 pfatal("SORRY. NO SPACE IN %s DIRECTORY\n", lfname); 662 pwarn("Could not reconnect inode %d\n", orphan); 663 goto noconnect; 664 } 665 666 dp = ginode(orphan); 667 LINK_RANGE(flow_msg, lncntp[orphan], -1); 668 if (flow_msg != NULL) { 669 LINK_CLEAR(flow_msg, orphan, dp->di_mode, &idesc); 670 if (statemap[orphan] == USTATE) 671 goto noconnect; 672 } 673 TRACK_LNCNTP(orphan, lncntp[orphan]--); 674 675 /* 676 * Make sure that anything we put into the normal namespace 677 * looks like it belongs there. Attributes can only be in 678 * attribute directories, not the normal directory lost+found. 679 */ 680 maybe_convert_attrdir_to_dir(orphan); 681 682 if (lostdir) { 683 /* 684 * Can't be creating a duplicate entry with makeentry(), 685 * because changeino() will succeed if ".." already 686 * exists. 687 */ 688 if ((changeino(orphan, "..", lfdir) & ALTERED) == 0 && 689 parentdir != (fsck_ino_t)-1) 690 (void) makeentry(orphan, lfdir, ".."); 691 /* 692 * If we were half-detached, don't try to get 693 * inode 0 later on. 694 */ 695 if (parentdir == 0) 696 parentdir = -1; 697 /* 698 * Fix up link counts. 699 * 700 * XXX This section is getting pretty byzantine, espcially 701 * when combined with changeino()/chgino()'s link manipulation. 702 */ 703 LFDIR_LINK_RANGE_RVAL(flow_msg, lncntp[lfdir], 1, &idesc, 0); 704 TRACK_LNCNTP(lfdir, lncntp[lfdir]--); 705 pwarn("DIR I=%lu CONNECTED. ", (long)orphan); 706 reattached_dir = 1; 707 if (parentdir != (fsck_ino_t)-1) { 708 /* 709 * Have to clear the parent's reference. Otherwise, 710 * if it's an orphan, then we may clear this orphan 711 * in pass 4 even though we've reconnected it. 712 * 713 * We already have the reference count 714 * allowing for a parent link, so undo the 715 * adjustment done above. Otherwise we come 716 * out high by one. 717 */ 718 (void) printf("PARENT WAS I=%lu\n", (long)parentdir); 719 (void) cleardirentry(parentdir, orphan); 720 } 721 if (!preen) 722 (void) printf("\n"); 723 } else if (preen) { 724 (void) printf(" (RECONNECTED)\n"); 725 } 726 727 statemap[orphan] &= ~INDELAYD; 728 return (1); 729 730 /* 731 * Leaving things unconnected is harmless as far as trying to 732 * use the filesystem later, so don't set iscorrupt yet (it's 733 * just lost blocks and inodes, after all). 734 * 735 * Lost directories get noted for reporting after all checks 736 * are done - they may get cleared later. 737 */ 738 noconnect: 739 if (lostdir) { 740 intree = tsearch((void *)orphan, &limbo_dirs, 741 ino_t_cmp); 742 if (intree == NULL) 743 errexit("linkup: out of memory"); 744 } 745 return (0); 746 } 747 748 /* 749 * fix an entry in a directory. 750 */ 751 int 752 changeino(fsck_ino_t dir, char *name, fsck_ino_t newnum) 753 { 754 struct inodesc idesc; 755 756 init_inodesc(&idesc); 757 idesc.id_type = DATA; 758 idesc.id_func = chgino; 759 idesc.id_number = dir; 760 idesc.id_fix = DONTKNOW; 761 idesc.id_name = name; 762 idesc.id_parent = newnum; /* new value for name */ 763 return (ckinode(ginode(dir), &idesc, CKI_TRAVERSE)); 764 } 765 766 /* 767 * make an entry in a directory 768 */ 769 int 770 makeentry(fsck_ino_t parent, fsck_ino_t ino, char *name) 771 { 772 int repeat; 773 struct dinode *dp; 774 struct inoinfo *iip; 775 struct inodesc idesc; 776 char pathbuf[MAXPATHLEN + 1]; 777 778 if (parent < UFSROOTINO || parent >= maxino || 779 ino < UFSROOTINO || ino >= maxino) 780 return (0); 781 init_inodesc(&idesc); 782 idesc.id_type = DATA; 783 idesc.id_func = mkentry; 784 idesc.id_number = parent; 785 idesc.id_parent = ino; /* this is the inode to enter */ 786 idesc.id_fix = DONTKNOW; 787 idesc.id_name = name; 788 789 repeat = 0; 790 again: 791 dp = ginode(parent); 792 if ((dp->di_size % DIRBLKSIZ) != 0) { 793 dp->di_size = roundup(dp->di_size, DIRBLKSIZ); 794 inodirty(); 795 796 iip = getinoinfo(ino); 797 if (iip != NULL) 798 iip->i_isize = dp->di_size; 799 } 800 801 if ((ckinode(dp, &idesc, CKI_TRAVERSE) & ALTERED) != 0) { 802 iip = getinoinfo(ino); 803 if (iip != NULL) 804 iip->i_isize = dp->di_size; 805 806 return (1); 807 } 808 809 if (repeat == 0) { 810 getpathname(pathbuf, parent, parent); 811 if (expanddir(parent, pathbuf) == 0) 812 return (0); 813 814 repeat = 1; 815 goto again; 816 } 817 818 return (0); 819 } 820 821 /* 822 * Attempt to expand the size of a directory 823 */ 824 static int 825 expanddir(fsck_ino_t ino, char *name) 826 { 827 struct bufarea *bpback, *bp[2]; 828 daddr32_t nxtibn, nxtbn; 829 daddr32_t newblk[2]; 830 struct dinode *dp; 831 char *cp; 832 int bc, f; 833 int n; 834 int allocIndir; 835 int frag2blks; 836 int lffragsz = 0; 837 int c = 0; 838 int retval = 0; 839 840 bp[0] = bp[1] = NULL; 841 842 dp = ginode(ino); 843 if (dp->di_size == 0) { 844 goto bail; 845 } 846 847 nxtbn = lblkno(&sblock, dp->di_size - 1) + 1; 848 849 /* 850 * Check that none of the nominally in-use direct block 851 * addresses for the directory are bogus. 852 */ 853 for (bc = 0; ((nxtbn > 0) && (bc < nxtbn) && (bc < NDADDR)); bc++) { 854 if (dp->di_db[bc] == 0) { 855 goto bail; 856 } 857 } 858 859 /* 860 * Determine our data block allocation needs. We always need to 861 * allocate at least one data block. We may need a second, the 862 * indirect block itself. 863 */ 864 allocIndir = 0; 865 nxtibn = -1; 866 n = 0; 867 868 if (nxtbn <= NDADDR) { 869 /* 870 * Still in direct blocks. Check for the unlikely 871 * case where the last block is a frag rather than 872 * a full block. This would only happen if someone had 873 * created a file in lost+found, and then that caused 874 * the dynamic directory shrinking capabilities of ufs 875 * to kick in. 876 * 877 * Note that we test nxtbn <= NDADDR, as it's the 878 * next block (i.e., one greater than the current/ 879 * actual block being examined). 880 */ 881 lffragsz = dp->di_size % sblock.fs_bsize; 882 } 883 884 if (nxtbn >= NDADDR && !lffragsz) { 885 n = sblock.fs_bsize / sizeof (daddr32_t); 886 nxtibn = nxtbn - NDADDR; 887 /* 888 * Only go one level of indirection 889 */ 890 if (nxtibn >= n) { 891 goto bail; 892 } 893 /* 894 * First indirect block means we need to pick up 895 * the actual indirect pointer block as well. 896 */ 897 if (nxtibn == 0) 898 allocIndir++; 899 } 900 901 /* 902 * Allocate all the new blocks we need. 903 */ 904 if ((newblk[0] = allocblk(sblock.fs_frag)) == 0) { 905 goto bail; 906 } 907 c++; 908 if (allocIndir) { 909 if ((newblk[1] = allocblk(sblock.fs_frag)) == 0) { 910 goto bail; 911 } 912 c++; 913 } 914 915 /* 916 * Take care of the block that will hold new directory entries. 917 * This one is always allocated. 918 */ 919 bp[0] = getdirblk(newblk[0], (size_t)sblock.fs_bsize); 920 if (bp[0]->b_errs) { 921 goto bail; 922 } 923 924 if (lffragsz) { 925 /* 926 * Preserve the partially-populated existing directory. 927 */ 928 bpback = getdirblk(dp->di_db[nxtbn - 1], 929 (size_t)dblksize(&sblock, dp, nxtbn - 1)); 930 if (!bpback->b_errs) { 931 (void) memmove(bp[0]->b_un.b_buf, bpback->b_un.b_buf, 932 (size_t)lffragsz); 933 } 934 } 935 936 /* 937 * Initialize the new fragments. lffragsz is zero if this 938 * is a completely-new block. 939 */ 940 for (cp = &(bp[0]->b_un.b_buf[lffragsz]); 941 cp < &(bp[0]->b_un.b_buf[sblock.fs_bsize]); 942 cp += DIRBLKSIZ) { 943 (void) memmove(cp, (char *)&emptydir, 944 sizeof (emptydir)); 945 } 946 dirty(bp[0]); 947 948 /* 949 * If we allocated the indirect block, zero it out. Otherwise 950 * read it in if we're using one. 951 */ 952 if (allocIndir) { 953 bp[1] = getdatablk(newblk[1], (size_t)sblock.fs_bsize); 954 if (bp[1]->b_errs) { 955 goto bail; 956 } 957 (void) memset(bp[1]->b_un.b_buf, 0, sblock.fs_bsize); 958 dirty(bp[1]); 959 } else if (nxtibn >= 0) { 960 /* Check that the indirect block pointer looks okay */ 961 if (dp->di_ib[0] == 0) { 962 goto bail; 963 } 964 bp[1] = getdatablk(dp->di_ib[0], (size_t)sblock.fs_bsize); 965 if (bp[1]->b_errs) { 966 goto bail; 967 } 968 969 for (bc = 0; ((bc < nxtibn) && (bc < n)); bc++) { 970 /* LINTED pointer cast alignment */ 971 if (((daddr32_t *)bp[1]->b_un.b_buf)[bc] == 0) { 972 goto bail; 973 } 974 } 975 } 976 977 /* 978 * Since the filesystem's consistency isn't affected by 979 * whether or not we actually do the expansion, iscorrupt 980 * is left alone for any of the approval paths. 981 */ 982 pwarn("NO SPACE LEFT IN %s", name); 983 if (!preen && (reply("EXPAND") == 0)) 984 goto bail; 985 986 /* 987 * Now that everything we need is gathered up and the 988 * necessary approvals acquired, we can make our provisional 989 * changes permanent. 990 */ 991 992 if (lffragsz) { 993 /* 994 * We've saved the data from the old end fragment(s) in 995 * our new block, so we can just swap the new one in. 996 * Make sure the size reflects the expansion of the 997 * final fragments/block. 998 */ 999 frag2blks = roundup(lffragsz, sblock.fs_fsize); 1000 freeblk(ino, dp->di_db[nxtbn - 1], 1001 frag2blks / sblock.fs_fsize); 1002 frag2blks = btodb(frag2blks); 1003 dp->di_size -= (u_offset_t)lffragsz; 1004 dp->di_blocks = dp->di_blocks - frag2blks; 1005 dp->di_db[nxtbn - 1] = newblk[0]; 1006 dp->di_size += (u_offset_t)sblock.fs_bsize; 1007 dp->di_blocks += btodb(sblock.fs_bsize); 1008 inodirty(); 1009 retval = 1; 1010 goto done; 1011 } 1012 1013 /* 1014 * Full-block addition's much easier. It's just an append. 1015 */ 1016 dp->di_size += (u_offset_t)sblock.fs_bsize; 1017 dp->di_blocks += btodb(sblock.fs_bsize); 1018 if (allocIndir) { 1019 dp->di_blocks += btodb(sblock.fs_bsize); 1020 } 1021 1022 inodirty(); 1023 if (nxtibn < 0) { 1024 /* 1025 * Still in direct blocks 1026 */ 1027 dp->di_db[nxtbn] = newblk[0]; 1028 } else { 1029 /* 1030 * Last indirect is always going to point at the 1031 * new directory buffer 1032 */ 1033 if (allocIndir) 1034 dp->di_ib[0] = newblk[1]; 1035 /* LINTED pointer case alignment */ 1036 ((daddr32_t *)bp[1]->b_un.b_buf)[nxtibn] = newblk[0]; 1037 dirty(bp[1]); 1038 } 1039 1040 if (preen) 1041 (void) printf(" (EXPANDED)\n"); 1042 1043 retval = 1; 1044 goto done; 1045 1046 bail: 1047 for (f = 0; f < c; f++) 1048 freeblk(ino, newblk[f], sblock.fs_frag); 1049 done: 1050 /* 1051 * bp[0] is handled by the directory cache's auto-release. 1052 */ 1053 if (bp[1] != NULL) 1054 brelse(bp[1]); 1055 1056 return (retval); 1057 } 1058 1059 static fsck_ino_t 1060 newdir(fsck_ino_t parent, fsck_ino_t request, int mode, caddr_t name) 1061 { 1062 fsck_ino_t dino; 1063 char pname[BUFSIZ]; 1064 1065 /* 1066 * This function creates a new directory and populates it with 1067 * "." and "..", then links to it as NAME in PARENT. 1068 */ 1069 dino = allocdir(parent, request, mode, 1); 1070 if (dino != 0) { 1071 getpathname(pname, parent, parent); 1072 name = mkuniqname(name, pname, parent, dino); 1073 /* 1074 * We don't touch numdirs, because it's just a cache of 1075 * what the filesystem claimed originally and is used 1076 * to calculate hash keys. 1077 */ 1078 if (makeentry(parent, dino, name) == 0) { 1079 freedir(dino, parent); 1080 dino = 0; 1081 } 1082 } 1083 1084 return (dino); 1085 } 1086 1087 /* 1088 * Replace whatever NAME refers to in PARENT with a new directory. 1089 * Note that if the old inode REQUEST is a directory, all of its 1090 * contents will be freed and reaped. 1091 */ 1092 static fsck_ino_t 1093 reallocdir(fsck_ino_t parent, fsck_ino_t request, int mode, caddr_t name) 1094 { 1095 int retval; 1096 fsck_ino_t newino; 1097 1098 if ((request != 0) && (statemap[request] != USTATE)) 1099 freeino(request, TI_PARENT); 1100 1101 newino = allocdir(parent, request, mode, 0); 1102 if (newino != 0) { 1103 retval = changeino(parent, name, newino); 1104 if ((retval & ALTERED) == 0) { 1105 /* 1106 * No change made, so name doesn't exist, so 1107 * unwind allocation rather than leak it. 1108 */ 1109 freedir(newino, parent); 1110 newino = 0; 1111 } 1112 } 1113 1114 return (newino); 1115 } 1116 1117 /* 1118 * allocate a new directory 1119 */ 1120 fsck_ino_t 1121 allocdir(fsck_ino_t parent, fsck_ino_t request, int mode, int update_parent) 1122 { 1123 fsck_ino_t ino; 1124 caddr_t cp; 1125 caddr_t flow; 1126 struct dinode *dp; 1127 struct bufarea *bp; 1128 struct inoinfo *inp; 1129 struct inodesc idesc; 1130 struct dirtemplate *dirp; 1131 1132 ino = allocino(request, IFDIR|mode); 1133 if (ino == 0) 1134 return (0); 1135 dirp = &dirhead; 1136 dirp->dot_ino = ino; 1137 dirp->dotdot_ino = parent; 1138 dp = ginode(ino); 1139 bp = getdirblk(dp->di_db[0], (size_t)sblock.fs_fsize); 1140 if (bp->b_errs) { 1141 freeino(ino, TI_PARENT); 1142 return (0); 1143 } 1144 (void) memmove(bp->b_un.b_buf, (void *)dirp, 1145 sizeof (struct dirtemplate)); 1146 for (cp = &bp->b_un.b_buf[DIRBLKSIZ]; 1147 cp < &bp->b_un.b_buf[sblock.fs_fsize]; 1148 cp += DIRBLKSIZ) 1149 (void) memmove(cp, (void *)&emptydir, sizeof (emptydir)); 1150 dirty(bp); 1151 dp->di_nlink = 2; 1152 inodirty(); 1153 if (!inocached(ino)) { 1154 cacheino(dp, ino); 1155 } else { 1156 /* 1157 * re-using an old directory inode 1158 */ 1159 inp = getinoinfo(ino); 1160 if (inp == NULL) { 1161 if (debug) 1162 errexit("allocdir got NULL from getinoinfo " 1163 "for existing entry I=%d\n", 1164 ino); 1165 cacheino(dp, ino); 1166 } else { 1167 init_inoinfo(inp, dp, ino); 1168 inp->i_parent = parent; 1169 inp->i_dotdot = parent; 1170 } 1171 } 1172 1173 /* 1174 * Short-circuit all the dancing around below if it's the 1175 * root inode. The net effect's the same. 1176 */ 1177 if (ino == UFSROOTINO) { 1178 TRACK_LNCNTP(ino, lncntp[ino] = dp->di_nlink); 1179 return (ino); 1180 } 1181 1182 if (!update_parent) 1183 return (ino); 1184 1185 /* 1186 * We never create attribute directories, which can have 1187 * non-directory parents. So, the parent of the directory 1188 * we're creating must itself be a directory. 1189 */ 1190 if (!INO_IS_DVALID(parent)) { 1191 freeino(ino, TI_PARENT); 1192 return (0); 1193 } 1194 1195 /* 1196 * Make sure the parent can handle another link. 1197 * Since we might only update one version of the 1198 * count (disk versus in-memory), we have to check both. 1199 */ 1200 LINK_RANGE(flow, lncntp[parent], -1); 1201 if (flow == NULL) 1202 LINK_RANGE(flow, (int)dp->di_nlink, 1); 1203 1204 if (flow != NULL) { 1205 LINK_CLEAR(flow, parent, dp->di_mode, &idesc); 1206 if (statemap[parent] == USTATE) { 1207 /* 1208 * No parent any more, so bail out. Callers 1209 * are expected to handle this possibility. 1210 * Since most just throw up their hands if 1211 * we return 0, this just happens to work. 1212 */ 1213 freeino(ino, TI_PARENT); 1214 return (0); 1215 } 1216 } 1217 1218 /* 1219 * We've created a directory with two entries, "." and "..", 1220 * and a link count of two ("." and one from its parent). If 1221 * the parent's not been scanned yet, which means this inode 1222 * will get scanned later as well, then make our in-core count 1223 * match what we pushed out to disk. Similarly, update the 1224 * parent. On the other hand, if the parent's already been 1225 * looked at (statemap[ino] == DFOUND), the discrepancy 1226 * between lncntp[] and di_nlink will be noted later, with 1227 * appropriate reporting and propagation, in pass2. 1228 * 1229 * We're explicitly skipping where the parent was DZLINK or 1230 * DFOUND. If it has zero links, it can't be gotten to, so 1231 * we want a discrepancy set up that will be caught in pass2. 1232 * DFOUND was discussed above. 1233 * 1234 * Regarding the claim of a link from the parent: we've not 1235 * done anything to create such a link here. We depend on the 1236 * semantics of our callers attaching the inode we return to 1237 * an existing entry in the directory or creating the entry 1238 * themselves, but in either case, not modifying the link 1239 * count. 1240 * 1241 * Note that setting lncntp[ino] to zero means that both claimed 1242 * links have been ``found''. 1243 */ 1244 statemap[ino] = statemap[parent]; 1245 if (INO_IS_DVALID(parent)) { 1246 TRACK_LNCNTP(ino, lncntp[ino] = 0); 1247 TRACK_LNCNTP(parent, lncntp[parent]--); 1248 } 1249 dp = ginode(parent); 1250 dp->di_nlink++; 1251 inodirty(); 1252 return (ino); 1253 } 1254 1255 /* 1256 * free a directory inode 1257 */ 1258 static void 1259 freedir(fsck_ino_t ino, fsck_ino_t parent) 1260 { 1261 struct inoinfo *iip; 1262 1263 if (ino != parent) { 1264 /* 1265 * Make sure that the desired parent gets a link 1266 * count update from freeino()/truncino(). If 1267 * we can't look it up, then it's not really a 1268 * directory, so there's nothing to worry about. 1269 */ 1270 iip = getinoinfo(ino); 1271 if (iip != NULL) 1272 iip->i_parent = parent; 1273 } 1274 freeino(ino, TI_PARENT); 1275 } 1276 1277 /* 1278 * generate a temporary name for use in the lost+found directory. 1279 */ 1280 static void 1281 lftempname(char *bufp, fsck_ino_t ino) 1282 { 1283 fsck_ino_t in; 1284 caddr_t cp; 1285 int namlen; 1286 1287 cp = bufp + 2; 1288 for (in = maxino; in > 0; in /= 10) 1289 cp++; 1290 *--cp = '\0'; 1291 /* LINTED difference will not overflow an int */ 1292 namlen = cp - bufp; 1293 if ((namlen > BUFSIZ) || (namlen > MAXPATHLEN)) { 1294 errexit("buffer overflow in lftempname()\n"); 1295 } 1296 1297 in = ino; 1298 while (cp > bufp) { 1299 *--cp = (in % 10) + '0'; 1300 in /= 10; 1301 } 1302 *cp = '#'; 1303 } 1304 1305 /* 1306 * Get a directory block. 1307 * Insure that it is held until another is requested. 1308 * 1309 * Our callers are expected to check for errors and/or be 1310 * prepared to handle blocks of zeros in the middle of a 1311 * directory. 1312 */ 1313 static struct bufarea * 1314 getdirblk(daddr32_t blkno, size_t size) 1315 { 1316 if (pdirbp != 0) { 1317 brelse(pdirbp); 1318 } 1319 pdirbp = getdatablk(blkno, size); 1320 return (pdirbp); 1321 } 1322 1323 /* 1324 * Create a unique name for INODE to be created in directory PARENT. 1325 * Use NAME if it is provided (non-NULL) and doesn't already exist. 1326 * Returning NULL indicates no unique name could be generated. 1327 * 1328 * If we were given a name, and it conflicts with an existing 1329 * entry, use our usual temp name instead. Without this check, 1330 * we could end up creating duplicate entries for multiple 1331 * orphaned directories in lost+found with the same name (but 1332 * different parents). Of course, our usual name might already 1333 * be in use as well, so be paranoid. 1334 * 1335 * We could do something like keep tacking something onto the 1336 * end of tempname until we come up with something that's not 1337 * in use, but that has liabilities as well. This is a 1338 * sufficiently rare case that it's not worth going that 1339 * overboard for. 1340 */ 1341 static caddr_t 1342 mkuniqname(caddr_t name, caddr_t pname, fsck_ino_t parent, fsck_ino_t inode) 1343 { 1344 fsck_ino_t oldino; 1345 struct dinode *dp; 1346 caddr_t flow_msg; 1347 struct inodesc idesc; 1348 static char tempname[BUFSIZ]; 1349 1350 lftempname(tempname, inode); 1351 if ((name != NULL) && 1352 (lookup_named_ino(parent, name) != 0)) { 1353 name = NULL; 1354 } 1355 if (name == NULL) { 1356 /* 1357 * No name given, or it wasn't unique. 1358 */ 1359 name = tempname; 1360 if ((oldino = lookup_named_ino(parent, name)) != 0) { 1361 pfatal( 1362 "Name ``%s'' for inode %d already exists in %s \n", 1363 name, oldino, pname); 1364 if (reply("REMOVE OLD ENTRY") == 0) { 1365 if (parent == lfdir) 1366 pwarn( 1367 "Could not reconnect inode %d\n\n", 1368 inode); 1369 else 1370 pwarn( 1371 "Could not create entry for %d\n\n", 1372 inode); 1373 name = NULL; 1374 goto noconnect; 1375 } 1376 (void) changeino(parent, name, inode); 1377 LINK_RANGE(flow_msg, lncntp[oldino], 1); 1378 if (flow_msg != NULL) { 1379 /* 1380 * Do a best-effort, but if we're not 1381 * allowed to do the clear, the fs is 1382 * corrupt in any case, so just carry on. 1383 */ 1384 dp = ginode(oldino); 1385 LINK_CLEAR(flow_msg, oldino, dp->di_mode, 1386 &idesc); 1387 if (statemap[oldino] != USTATE) 1388 iscorrupt = 1; 1389 } else { 1390 TRACK_LNCNTP(oldino, lncntp[oldino]++); 1391 } 1392 } 1393 } 1394 1395 noconnect: 1396 return (name); 1397 } 1398