1 /*
2 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
3 */
4
5 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
6 /* All Rights Reserved */
7
8 /*
9 * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms are permitted
13 * provided that: (1) source distributions retain this entire copyright
14 * notice and comment, and (2) distributions including binaries display
15 * the following acknowledgement: ``This product includes software
16 * developed by the University of California, Berkeley and its contributors''
17 * in the documentation or other materials provided with the distribution
18 * and in all advertising materials mentioning features or use of this
19 * software. Neither the name of the University nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
23 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
24 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
25 */
26
27
28 #include <stdio.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <time.h>
33 #include <limits.h>
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/sysmacros.h>
37 #include <sys/mntent.h>
38 #include <sys/vnode.h>
39 #include <sys/fs/ufs_inode.h>
40 #include <sys/fs/ufs_fs.h>
41 #define _KERNEL
42 #include <sys/fs/ufs_fsdir.h>
43 #undef _KERNEL
44 #include <pwd.h>
45 #include "fsck.h"
46
47 uint_t largefile_count = 0;
48 fsck_ino_t lastino;
49 struct bufarea cgblk;
50 struct inoinfo **aclphead, **aclpsort;
51 struct dinode zino;
52
53 static int get_indir_offsets(int, daddr_t, int *, int *);
54 static int clearanentry(struct inodesc *);
55 static void pdinode(struct dinode *);
56 static void inoflush(void);
57 static void mark_delayed_inodes(fsck_ino_t, daddr32_t);
58 static int iblock(struct inodesc *, int, u_offset_t, enum cki_action);
59 static struct inoinfo *search_cache(struct inoinfo *, fsck_ino_t);
60 static int ckinode_common(struct dinode *, struct inodesc *, enum cki_action);
61 static int lookup_dotdot_ino(fsck_ino_t);
62
63 /*
64 * ckinode() essentially traverses the blocklist of the provided
65 * inode. For each block either the caller-supplied callback (id_func
66 * in the provided struct inodesc) or dirscan() is invoked. Which is
67 * chosen is controlled by what type of traversal was requested
68 * (id_type) - if it was for an ADDR or ACL, use the callback,
69 * otherwise it is assumed to be DATA (i.e., a directory) whose
70 * contents need to be scanned.
71 *
72 * Note that a directory inode can get passed in with a type of ADDR;
73 * the type field is orthogonal to the IFMT value. This is so that
74 * the file aspects (no duplicate blocks, etc) of a directory can be
75 * verified just like is done for any other file, or the actual
76 * contents can be scanned so that connectivity and such can be
77 * investigated.
78 *
79 * The traversal is controlled by flags in the return value of
80 * dirscan() or the callback. Five flags are defined, STOP, SKIP,
81 * KEEPON, ALTERED, and FOUND. Their semantics are:
82 *
83 * STOP - no further processing of this inode is desired/possible/
84 * feasible/etc. This can mean that whatever the scan
85 * was searching for was found, or a serious
86 * inconsistency was encountered, or anything else
87 * appropriate.
88 *
89 * SKIP - something that made it impossible to continue was
90 * encountered, and the caller should go on to the next
91 * inode. This is more for i/o failures than for
92 * logical inconsistencies. Nothing actually looks for
93 * this.
94 *
95 * KEEPON - no more blocks of this inode need to be scanned, but
96 * nothing's wrong, so keep on going with the next
97 * inode. It is similar to STOP, except that
98 * ckinode()'s caller will typically advance to the next
99 * inode for KEEPON, whereas it ceases scanning through
100 * the inodes completely for STOP.
101 *
102 * ALTERED - a change was made to the inode. If the caller sees
103 * this set, it should make sure to flush out the
104 * changes. Note that any data blocks read in by the
105 * function need to be marked dirty by it directly;
106 * flushing of those will happen automatically later.
107 *
108 * FOUND - whatever was being searched for was located.
109 * Typically combined with STOP to avoid wasting time
110 * doing additional looking.
111 *
112 * During a traversal, some state needs to be carried around. At the
113 * least, the callback functions need to know what inode they're
114 * working on, which logical block, and whether or not fixing problems
115 * when they're encountered is desired. Rather than try to guess what
116 * else might be needed (and thus end up passing way more arguments
117 * than is reasonable), all the possibilities have been bundled in
118 * struct inodesc. About half of the fields are specific to directory
119 * traversals, and the rest are pretty much generic to any traversal.
120 *
121 * The general fields are:
122 *
123 * id_fix What to do when an error is found. Generally, this
124 * is set to DONTKNOW before a traversal. If a
125 * problem is encountered, it is changed to either FIX
126 * or NOFIX by the dofix() query function. If id_fix
127 * has already been set to FIX when dofix() is called, then
128 * it includes the ALTERED flag (see above) in its return
129 * value; the net effect is that the inode's buffer
130 * will get marked dirty and written to disk at some
131 * point. If id_fix is DONTKNOW, then dofix() will
132 * query the user. If it is NOFIX, then dofix()
133 * essentially does nothing. A few routines set NOFIX
134 * as the initial value, as they are performing a best-
135 * effort informational task, rather than an actual
136 * repair operation.
137 *
138 * id_func This is the function that will be called for every
139 * logical block in the file (assuming id_type is not
140 * DATA). The logical block may represent a hole, so
141 * the callback needs to be prepared to handle that
142 * case. Its return value is a combination of the flags
143 * described above (SKIP, ALTERED, etc).
144 *
145 * id_number The inode number whose block list or data is being
146 * scanned.
147 *
148 * id_parent When id_type is DATA, this is the inode number for
149 * the parent of id_number. Otherwise, it is
150 * available for use as an extra parameter or return
151 * value between the callback and ckinode()'s caller.
152 * Which, if either, of those is left completely up to
153 * the two routines involved, so nothing can generally
154 * be assumed about the id_parent value for non-DATA
155 * traversals.
156 *
157 * id_lbn This is the current logical block (not fragment)
158 * number being visited by the traversal.
159 *
160 * id_blkno This is the physical block corresponding to id_lbn.
161 *
162 * id_numfrags This defines how large a block is being processed in
163 * this particular invocation of the callback.
164 * Usually, it will be the same as sblock.fs_frag.
165 * However, if a direct block is being processed and
166 * it is less than a full filesystem block,
167 * id_numfrags will indicate just how many fragments
168 * (starting from id_lbn) are actually part of the
169 * file.
170 *
171 * id_truncto The pass 4 callback is used in several places to
172 * free the blocks of a file (the `FILE HAS PROBLEM
173 * FOO; CLEAR?' scenario). This has been generalized
174 * to allow truncating a file to a particular length
175 * rather than always completely discarding it. If
176 * id_truncto is -1, then the entire file is released,
177 * otherwise it is logical block number to truncate
178 * to. This generalized interface was motivated by a
179 * desire to be able to discard everything after a
180 * hole in a directory, rather than the entire
181 * directory.
182 *
183 * id_type Selects the type of traversal. DATA for dirscan(),
184 * ADDR or ACL for using the provided callback.
185 *
186 * There are several more fields used just for dirscan() traversals:
187 *
188 * id_filesize The number of bytes in the overall directory left to
189 * process.
190 *
191 * id_loc Byte position within the directory block. Should always
192 * point to the start of a directory entry.
193 *
194 * id_entryno Which logical directory entry is being processed (0
195 * is `.', 1 is `..', 2 and on are normal entries).
196 * This field is primarily used to enable special
197 * checks when looking at the first two entries.
198 *
199 * The exception (there's always an exception in fsck)
200 * is that in pass 1, it tracks how many fragments are
201 * being used by a particular inode.
202 *
203 * id_firsthole The first logical block number that was found to
204 * be zero. As directories are not supposed to have
205 * holes, this marks where a directory should be
206 * truncated down to. A value of -1 indicates that
207 * no holes were found.
208 *
209 * id_dirp A pointer to the in-memory copy of the current
210 * directory entry (as identified by id_loc).
211 *
212 * id_name This is a directory entry name to either create
213 * (callback is mkentry) or locate (callback is
214 * chgino, findino, or findname).
215 */
216 int
ckinode(struct dinode * dp,struct inodesc * idesc,enum cki_action action)217 ckinode(struct dinode *dp, struct inodesc *idesc, enum cki_action action)
218 {
219 struct inodesc cleardesc;
220 mode_t mode;
221
222 if (idesc->id_filesize == 0)
223 idesc->id_filesize = (offset_t)dp->di_size;
224
225 /*
226 * Our caller should be filtering out completely-free inodes
227 * (mode == zero), so we'll work on the assumption that what
228 * we're given has some basic validity.
229 *
230 * The kernel is inconsistent about MAXPATHLEN including the
231 * trailing \0, so allow the more-generous length for symlinks.
232 */
233 mode = dp->di_mode & IFMT;
234 if (mode == IFBLK || mode == IFCHR)
235 return (KEEPON);
236 if (mode == IFLNK && dp->di_size > MAXPATHLEN) {
237 pwarn("I=%d Symlink longer than supported maximum\n",
238 idesc->id_number);
239 init_inodesc(&cleardesc);
240 cleardesc.id_type = ADDR;
241 cleardesc.id_number = idesc->id_number;
242 cleardesc.id_fix = DONTKNOW;
243 clri(&cleardesc, "BAD", CLRI_VERBOSE, CLRI_NOP_CORRUPT);
244 return (STOP);
245 }
246 return (ckinode_common(dp, idesc, action));
247 }
248
249 /*
250 * This was split out from ckinode() to allow it to be used
251 * without having to pass in kludge flags to suppress the
252 * wrong-for-deletion initialization and irrelevant checks.
253 * This feature is no longer needed, but is being kept in case
254 * the need comes back.
255 */
256 static int
ckinode_common(struct dinode * dp,struct inodesc * idesc,enum cki_action action)257 ckinode_common(struct dinode *dp, struct inodesc *idesc,
258 enum cki_action action)
259 {
260 offset_t offset;
261 struct dinode dino;
262 daddr_t ndb;
263 int indir_data_blks, last_indir_blk;
264 int ret, i, frags;
265
266 (void) memmove(&dino, dp, sizeof (struct dinode));
267 ndb = howmany(dino.di_size, (u_offset_t)sblock.fs_bsize);
268
269 for (i = 0; i < NDADDR; i++) {
270 idesc->id_lbn++;
271 offset = blkoff(&sblock, dino.di_size);
272 if ((--ndb == 0) && (offset != 0)) {
273 idesc->id_numfrags =
274 numfrags(&sblock, fragroundup(&sblock, offset));
275 } else {
276 idesc->id_numfrags = sblock.fs_frag;
277 }
278 if (dino.di_db[i] == 0) {
279 if ((ndb > 0) && (idesc->id_firsthole < 0)) {
280 idesc->id_firsthole = i;
281 }
282 continue;
283 }
284 idesc->id_blkno = dino.di_db[i];
285 if (idesc->id_type == ADDR || idesc->id_type == ACL)
286 ret = (*idesc->id_func)(idesc);
287 else
288 ret = dirscan(idesc);
289
290 /*
291 * Need to clear the entry, now that we're done with
292 * it. We depend on freeblk() ignoring a request to
293 * free already-free fragments to handle the problem of
294 * a partial block.
295 */
296 if ((action == CKI_TRUNCATE) &&
297 (idesc->id_truncto >= 0) &&
298 (idesc->id_lbn >= idesc->id_truncto)) {
299 dp = ginode(idesc->id_number);
300 /*
301 * The (int) cast is safe, in that if di_size won't
302 * fit, it'll be a multiple of any legal fs_frag,
303 * thus giving a zero result. That value, in turn
304 * means we're doing an entire block.
305 */
306 frags = howmany((int)dp->di_size, sblock.fs_fsize) %
307 sblock.fs_frag;
308 if (frags == 0)
309 frags = sblock.fs_frag;
310 freeblk(idesc->id_number, dp->di_db[i],
311 frags);
312 dp = ginode(idesc->id_number);
313 dp->di_db[i] = 0;
314 inodirty();
315 ret |= ALTERED;
316 }
317
318 if (ret & STOP)
319 return (ret);
320 }
321
322 #ifdef lint
323 /*
324 * Cure a lint complaint of ``possible use before set''.
325 * Apparently it can't quite figure out the switch statement.
326 */
327 indir_data_blks = 0;
328 #endif
329 /*
330 * indir_data_blks contains the number of data blocks in all
331 * the previous levels for this iteration. E.g., for the
332 * single indirect case (i = 0, di_ib[i] != 0), NDADDR's worth
333 * of blocks have already been covered by the direct blocks
334 * (di_db[]). At the triple indirect level (i = NIADDR - 1),
335 * it is all of the number of data blocks that were covered
336 * by the second indirect, single indirect, and direct block
337 * levels.
338 */
339 idesc->id_numfrags = sblock.fs_frag;
340 ndb = howmany(dino.di_size, (u_offset_t)sblock.fs_bsize);
341 for (i = 0; i < NIADDR; i++) {
342 (void) get_indir_offsets(i, ndb, &indir_data_blks,
343 &last_indir_blk);
344 if (dino.di_ib[i] != 0) {
345 /*
346 * We'll only clear di_ib[i] if the first entry (and
347 * therefore all of them) is to be cleared, since we
348 * only go through this code on the first entry of
349 * each level of indirection. The +1 is to account
350 * for the fact that we don't modify id_lbn until
351 * we actually start processing on a data block.
352 */
353 idesc->id_blkno = dino.di_ib[i];
354 ret = iblock(idesc, i + 1,
355 (u_offset_t)howmany(dino.di_size,
356 (u_offset_t)sblock.fs_bsize) - indir_data_blks,
357 action);
358 if ((action == CKI_TRUNCATE) &&
359 (idesc->id_truncto <= indir_data_blks) &&
360 ((idesc->id_lbn + 1) >= indir_data_blks) &&
361 ((idesc->id_lbn + 1) <= last_indir_blk)) {
362 dp = ginode(idesc->id_number);
363 if (dp->di_ib[i] != 0) {
364 freeblk(idesc->id_number, dp->di_ib[i],
365 sblock.fs_frag);
366 }
367 }
368 if (ret & STOP)
369 return (ret);
370 } else {
371 /*
372 * Need to know which of the file's logical blocks
373 * reside in the missing indirect block. However, the
374 * precise location is only needed for truncating
375 * directories, and level-of-indirection precision is
376 * sufficient for that.
377 */
378 if ((indir_data_blks < ndb) &&
379 (idesc->id_firsthole < 0)) {
380 idesc->id_firsthole = indir_data_blks;
381 }
382 }
383 }
384 return (KEEPON);
385 }
386
387 static int
get_indir_offsets(int ilevel_wanted,daddr_t ndb,int * data_blks,int * last_blk)388 get_indir_offsets(int ilevel_wanted, daddr_t ndb, int *data_blks,
389 int *last_blk)
390 {
391 int ndb_ilevel = -1;
392 int ilevel;
393 int dblks, lblk;
394
395 for (ilevel = 0; ilevel < NIADDR; ilevel++) {
396 switch (ilevel) {
397 case 0: /* SINGLE */
398 dblks = NDADDR;
399 lblk = dblks + NINDIR(&sblock) - 1;
400 break;
401 case 1: /* DOUBLE */
402 dblks = NDADDR + NINDIR(&sblock);
403 lblk = dblks + (NINDIR(&sblock) * NINDIR(&sblock)) - 1;
404 break;
405 case 2: /* TRIPLE */
406 dblks = NDADDR + NINDIR(&sblock) +
407 (NINDIR(&sblock) * NINDIR(&sblock));
408 lblk = dblks + (NINDIR(&sblock) * NINDIR(&sblock) *
409 NINDIR(&sblock)) - 1;
410 break;
411 default:
412 exitstat = EXERRFATAL;
413 /*
414 * Translate from zero-based array to
415 * one-based human-style counting.
416 */
417 errexit("panic: indirection level %d not 1, 2, or 3",
418 ilevel + 1);
419 /* NOTREACHED */
420 }
421
422 if (dblks < ndb && ndb <= lblk)
423 ndb_ilevel = ilevel;
424
425 if (ilevel == ilevel_wanted) {
426 if (data_blks != NULL)
427 *data_blks = dblks;
428 if (last_blk != NULL)
429 *last_blk = lblk;
430 }
431 }
432
433 return (ndb_ilevel);
434 }
435
436 static int
iblock(struct inodesc * idesc,int ilevel,u_offset_t iblks,enum cki_action action)437 iblock(struct inodesc *idesc, int ilevel, u_offset_t iblks,
438 enum cki_action action)
439 {
440 struct bufarea *bp;
441 int i, n;
442 int (*func)(struct inodesc *) = NULL;
443 u_offset_t fsbperindirb;
444 daddr32_t last_lbn;
445 int nif;
446 char buf[BUFSIZ];
447
448 n = KEEPON;
449
450 switch (idesc->id_type) {
451 case ADDR:
452 func = idesc->id_func;
453 if (((n = (*func)(idesc)) & KEEPON) == 0)
454 return (n);
455 break;
456 case ACL:
457 func = idesc->id_func;
458 break;
459 case DATA:
460 func = dirscan;
461 break;
462 default:
463 errexit("unknown inodesc type %d in iblock()", idesc->id_type);
464 /* NOTREACHED */
465 }
466 if (chkrange(idesc->id_blkno, idesc->id_numfrags)) {
467 return ((idesc->id_type == ACL) ? STOP : SKIP);
468 }
469
470 bp = getdatablk(idesc->id_blkno, (size_t)sblock.fs_bsize);
471 if (bp->b_errs != 0) {
472 brelse(bp);
473 return (SKIP);
474 }
475
476 ilevel--;
477 /*
478 * Trivia note: the BSD fsck has the number of bytes remaining
479 * as the third argument to iblock(), so the equivalent of
480 * fsbperindirb starts at fs_bsize instead of one. We're
481 * working in units of filesystem blocks here, not bytes or
482 * fragments.
483 */
484 for (fsbperindirb = 1, i = 0; i < ilevel; i++) {
485 fsbperindirb *= (u_offset_t)NINDIR(&sblock);
486 }
487 /*
488 * nif indicates the next "free" pointer (as an array index) in this
489 * indirect block, based on counting the blocks remaining in the
490 * file after subtracting all previously processed blocks.
491 * This figure is based on the size field of the inode.
492 *
493 * Note that in normal operation, nif may initially be calculated
494 * as larger than the number of pointers in this block (as when
495 * there are more indirect blocks following); if that is
496 * the case, nif is limited to the max number of pointers per
497 * indirect block.
498 *
499 * Also note that if an inode is inconsistent (has more blocks
500 * allocated to it than the size field would indicate), the sweep
501 * through any indirect blocks directly pointed at by the inode
502 * continues. Since the block offset of any data blocks referenced
503 * by these indirect blocks is greater than the size of the file,
504 * the index nif may be computed as a negative value.
505 * In this case, we reset nif to indicate that all pointers in
506 * this retrieval block should be zeroed and the resulting
507 * unreferenced data and/or retrieval blocks will be recovered
508 * through garbage collection later.
509 */
510 nif = (offset_t)howmany(iblks, fsbperindirb);
511 if (nif > NINDIR(&sblock))
512 nif = NINDIR(&sblock);
513 else if (nif < 0)
514 nif = 0;
515 /*
516 * first pass: all "free" retrieval pointers (from [nif] thru
517 * the end of the indirect block) should be zero. (This
518 * assertion does not hold for directories, which may be
519 * truncated without releasing their allocated space)
520 */
521 if (nif < NINDIR(&sblock) && (idesc->id_func == pass1check ||
522 idesc->id_func == pass3bcheck)) {
523 for (i = nif; i < NINDIR(&sblock); i++) {
524 if (bp->b_un.b_indir[i] == 0)
525 continue;
526 (void) sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu",
527 (ulong_t)idesc->id_number);
528 if (preen) {
529 pfatal(buf);
530 } else if (dofix(idesc, buf)) {
531 freeblk(idesc->id_number,
532 bp->b_un.b_indir[i],
533 sblock.fs_frag);
534 bp->b_un.b_indir[i] = 0;
535 dirty(bp);
536 }
537 }
538 flush(fswritefd, bp);
539 }
540 /*
541 * second pass: all retrieval pointers referring to blocks within
542 * a valid range [0..filesize] (both indirect and data blocks)
543 * are examined in the same manner as ckinode() checks the
544 * direct blocks in the inode. Sweep through from
545 * the first pointer in this retrieval block to [nif-1].
546 */
547 last_lbn = howmany(idesc->id_filesize, sblock.fs_bsize);
548 for (i = 0; i < nif; i++) {
549 if (ilevel == 0)
550 idesc->id_lbn++;
551 if (bp->b_un.b_indir[i] != 0) {
552 idesc->id_blkno = bp->b_un.b_indir[i];
553 if (ilevel > 0) {
554 n = iblock(idesc, ilevel, iblks, action);
555 /*
556 * Each iteration decreases "remaining block
557 * count" by the number of blocks accessible
558 * by a pointer at this indirect block level.
559 */
560 iblks -= fsbperindirb;
561 } else {
562 /*
563 * If we're truncating, func will discard
564 * the data block for us.
565 */
566 n = (*func)(idesc);
567 }
568
569 if ((action == CKI_TRUNCATE) &&
570 (idesc->id_truncto >= 0) &&
571 (idesc->id_lbn >= idesc->id_truncto)) {
572 freeblk(idesc->id_number, bp->b_un.b_indir[i],
573 sblock.fs_frag);
574 }
575
576 /*
577 * Note that truncation never gets STOP back
578 * under normal circumstances. Abnormal would
579 * be a bad acl short-circuit in iblock() or
580 * an out-of-range failure in pass4check().
581 * We still want to keep going when truncating
582 * under those circumstances, since the whole
583 * point of truncating is to get rid of all
584 * that.
585 */
586 if ((n & STOP) && (action != CKI_TRUNCATE)) {
587 brelse(bp);
588 return (n);
589 }
590 } else {
591 if ((idesc->id_lbn < last_lbn) &&
592 (idesc->id_firsthole < 0)) {
593 idesc->id_firsthole = idesc->id_lbn;
594 }
595 if (idesc->id_type == DATA) {
596 /*
597 * No point in continuing in the indirect
598 * blocks of a directory, since they'll just
599 * get freed anyway.
600 */
601 brelse(bp);
602 return ((n & ~KEEPON) | STOP);
603 }
604 }
605 }
606
607 brelse(bp);
608 return (KEEPON);
609 }
610
611 /*
612 * Check that a block is a legal block number.
613 * Return 0 if in range, 1 if out of range.
614 */
615 int
chkrange(daddr32_t blk,int cnt)616 chkrange(daddr32_t blk, int cnt)
617 {
618 int c;
619
620 if (cnt <= 0 || blk <= 0 || ((unsigned)blk >= (unsigned)maxfsblock) ||
621 ((cnt - 1) > (maxfsblock - blk))) {
622 if (debug)
623 (void) printf(
624 "Bad fragment range: should be 1 <= %d..%d < %d\n",
625 blk, blk + cnt, maxfsblock);
626 return (1);
627 }
628 if ((cnt > sblock.fs_frag) ||
629 ((fragnum(&sblock, blk) + cnt) > sblock.fs_frag)) {
630 if (debug)
631 (void) printf("Bad fragment size: size %d\n", cnt);
632 return (1);
633 }
634 c = dtog(&sblock, blk);
635 if (blk < cgdmin(&sblock, c)) {
636 if ((unsigned)(blk + cnt) > (unsigned)cgsblock(&sblock, c)) {
637 if (debug)
638 (void) printf(
639 "Bad fragment position: %d..%d spans start of cg metadata\n",
640 blk, blk + cnt);
641 return (1);
642 }
643 } else {
644 if ((unsigned)(blk + cnt) > (unsigned)cgbase(&sblock, c+1)) {
645 if (debug)
646 (void) printf(
647 "Bad frag pos: %d..%d crosses end of cg\n",
648 blk, blk + cnt);
649 return (1);
650 }
651 }
652 return (0);
653 }
654
655 /*
656 * General purpose interface for reading inodes.
657 */
658
659 /*
660 * Note that any call to ginode() can potentially invalidate any
661 * dinode pointers previously acquired from it. To avoid pain,
662 * make sure to always call inodirty() immediately after modifying
663 * an inode, if there's any chance of ginode() being called after
664 * that. Also, always call ginode() right before you need to access
665 * an inode, so that there won't be any surprises from functions
666 * called between the previous ginode() invocation and the dinode
667 * use.
668 *
669 * Despite all that, we aren't doing the amount of i/o that's implied,
670 * as we use the buffer cache that getdatablk() and friends maintain.
671 */
672 static fsck_ino_t startinum = -1;
673
674 struct dinode *
ginode(fsck_ino_t inum)675 ginode(fsck_ino_t inum)
676 {
677 daddr32_t iblk;
678 struct dinode *dp;
679
680 if (inum < UFSROOTINO || inum > maxino) {
681 errexit("bad inode number %d to ginode\n", inum);
682 }
683 if (startinum == -1 ||
684 pbp == NULL ||
685 inum < startinum ||
686 inum >= (fsck_ino_t)(startinum + (fsck_ino_t)INOPB(&sblock))) {
687 iblk = itod(&sblock, inum);
688 if (pbp != NULL) {
689 brelse(pbp);
690 }
691 /*
692 * We don't check for errors here, because we can't
693 * tell our caller about it, and the zeros that will
694 * be in the buffer are just as good as anything we
695 * could fake.
696 */
697 pbp = getdatablk(iblk, (size_t)sblock.fs_bsize);
698 startinum =
699 (fsck_ino_t)((inum / INOPB(&sblock)) * INOPB(&sblock));
700 }
701 dp = &pbp->b_un.b_dinode[inum % INOPB(&sblock)];
702 if (dp->di_suid != UID_LONG)
703 dp->di_uid = dp->di_suid;
704 if (dp->di_sgid != GID_LONG)
705 dp->di_gid = dp->di_sgid;
706 return (dp);
707 }
708
709 /*
710 * Special purpose version of ginode used to optimize first pass
711 * over all the inodes in numerical order. It bypasses the buffer
712 * system used by ginode(), etc in favour of reading the bulk of a
713 * cg's inodes at one time.
714 */
715 static fsck_ino_t nextino, lastinum;
716 static int64_t readcnt, readpercg, fullcnt, inobufsize;
717 static int64_t partialcnt, partialsize;
718 static size_t lastsize;
719 static struct dinode *inodebuf;
720 static diskaddr_t currentdblk;
721 static struct dinode *currentinode;
722
723 struct dinode *
getnextinode(fsck_ino_t inum)724 getnextinode(fsck_ino_t inum)
725 {
726 size_t size;
727 diskaddr_t dblk;
728 static struct dinode *dp;
729
730 if (inum != nextino++ || inum > maxino)
731 errexit("bad inode number %d to nextinode\n", inum);
732
733 /*
734 * Will always go into the if() the first time we're called,
735 * so dp will always be valid.
736 */
737 if (inum >= lastinum) {
738 readcnt++;
739 dblk = fsbtodb(&sblock, itod(&sblock, lastinum));
740 currentdblk = dblk;
741 if (readcnt % readpercg == 0) {
742 if (partialsize > SIZE_MAX)
743 errexit(
744 "Internal error: partialsize overflow");
745 size = (size_t)partialsize;
746 lastinum += partialcnt;
747 } else {
748 if (inobufsize > SIZE_MAX)
749 errexit("Internal error: inobufsize overflow");
750 size = (size_t)inobufsize;
751 lastinum += fullcnt;
752 }
753 /*
754 * If fsck_bread() returns an error, it will already have
755 * zeroed out the buffer, so we do not need to do so here.
756 */
757 (void) fsck_bread(fsreadfd, (caddr_t)inodebuf, dblk, size);
758 lastsize = size;
759 dp = inodebuf;
760 }
761 currentinode = dp;
762 return (dp++);
763 }
764
765 /*
766 * Reread the current getnext() buffer. This allows for changing inodes
767 * other than the current one via ginode()/inodirty()/inoflush().
768 *
769 * Just reuses all the interesting variables that getnextinode() set up
770 * last time it was called. This shouldn't get called often, so we don't
771 * try to figure out if the caller's actually touched an inode in the
772 * range we have cached. There could have been an arbitrary number of
773 * them, after all.
774 */
775 struct dinode *
getnextrefresh(void)776 getnextrefresh(void)
777 {
778 if (inodebuf == NULL) {
779 return (NULL);
780 }
781
782 inoflush();
783 (void) fsck_bread(fsreadfd, (caddr_t)inodebuf, currentdblk, lastsize);
784 return (currentinode);
785 }
786
787 void
resetinodebuf(void)788 resetinodebuf(void)
789 {
790 startinum = 0;
791 nextino = 0;
792 lastinum = 0;
793 readcnt = 0;
794 inobufsize = blkroundup(&sblock, INOBUFSIZE);
795 fullcnt = inobufsize / sizeof (struct dinode);
796 readpercg = sblock.fs_ipg / fullcnt;
797 partialcnt = sblock.fs_ipg % fullcnt;
798 partialsize = partialcnt * sizeof (struct dinode);
799 if (partialcnt != 0) {
800 readpercg++;
801 } else {
802 partialcnt = fullcnt;
803 partialsize = inobufsize;
804 }
805 if (inodebuf == NULL &&
806 (inodebuf = (struct dinode *)malloc((unsigned)inobufsize)) == NULL)
807 errexit("Cannot allocate space for inode buffer\n");
808 while (nextino < UFSROOTINO)
809 (void) getnextinode(nextino);
810 }
811
812 void
freeinodebuf(void)813 freeinodebuf(void)
814 {
815 if (inodebuf != NULL) {
816 free((void *)inodebuf);
817 }
818 inodebuf = NULL;
819 }
820
821 /*
822 * Routines to maintain information about directory inodes.
823 * This is built during the first pass and used during the
824 * second and third passes.
825 *
826 * Enter inodes into the cache.
827 */
828 void
cacheino(struct dinode * dp,fsck_ino_t inum)829 cacheino(struct dinode *dp, fsck_ino_t inum)
830 {
831 struct inoinfo *inp;
832 struct inoinfo **inpp;
833 uint_t blks;
834
835 blks = NDADDR + NIADDR;
836 inp = (struct inoinfo *)
837 malloc(sizeof (*inp) + (blks - 1) * sizeof (daddr32_t));
838 if (inp == NULL)
839 errexit("Cannot increase directory list\n");
840 init_inoinfo(inp, dp, inum); /* doesn't touch i_nextlist or i_number */
841 inpp = &inphead[inum % numdirs];
842 inp->i_nextlist = *inpp;
843 *inpp = inp;
844 inp->i_number = inum;
845 if (inplast == listmax) {
846 listmax += 100;
847 inpsort = (struct inoinfo **)realloc((void *)inpsort,
848 (unsigned)listmax * sizeof (struct inoinfo *));
849 if (inpsort == NULL)
850 errexit("cannot increase directory list");
851 }
852 inpsort[inplast++] = inp;
853 }
854
855 /*
856 * Look up an inode cache structure.
857 */
858 struct inoinfo *
getinoinfo(fsck_ino_t inum)859 getinoinfo(fsck_ino_t inum)
860 {
861 struct inoinfo *inp;
862
863 inp = search_cache(inphead[inum % numdirs], inum);
864 return (inp);
865 }
866
867 /*
868 * Determine whether inode is in cache.
869 */
870 int
inocached(fsck_ino_t inum)871 inocached(fsck_ino_t inum)
872 {
873 return (search_cache(inphead[inum % numdirs], inum) != NULL);
874 }
875
876 /*
877 * Clean up all the inode cache structure.
878 */
879 void
inocleanup(void)880 inocleanup(void)
881 {
882 struct inoinfo **inpp;
883
884 if (inphead == NULL)
885 return;
886 for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) {
887 free((void *)(*inpp));
888 }
889 free((void *)inphead);
890 free((void *)inpsort);
891 inphead = inpsort = NULL;
892 }
893
894 /*
895 * Routines to maintain information about acl inodes.
896 * This is built during the first pass and used during the
897 * second and third passes.
898 *
899 * Enter acl inodes into the cache.
900 */
901 void
cacheacl(struct dinode * dp,fsck_ino_t inum)902 cacheacl(struct dinode *dp, fsck_ino_t inum)
903 {
904 struct inoinfo *aclp;
905 struct inoinfo **aclpp;
906 uint_t blks;
907
908 blks = NDADDR + NIADDR;
909 aclp = (struct inoinfo *)
910 malloc(sizeof (*aclp) + (blks - 1) * sizeof (daddr32_t));
911 if (aclp == NULL)
912 return;
913 aclpp = &aclphead[inum % numacls];
914 aclp->i_nextlist = *aclpp;
915 *aclpp = aclp;
916 aclp->i_number = inum;
917 aclp->i_isize = (offset_t)dp->di_size;
918 aclp->i_blkssize = (size_t)(blks * sizeof (daddr32_t));
919 (void) memmove(&aclp->i_blks[0], &dp->di_db[0], aclp->i_blkssize);
920 if (aclplast == aclmax) {
921 aclmax += 100;
922 aclpsort = (struct inoinfo **)realloc((char *)aclpsort,
923 (unsigned)aclmax * sizeof (struct inoinfo *));
924 if (aclpsort == NULL)
925 errexit("cannot increase acl list");
926 }
927 aclpsort[aclplast++] = aclp;
928 }
929
930
931 /*
932 * Generic cache search function.
933 * ROOT is the first entry in a hash chain (the caller is expected
934 * to have done the initial bucket lookup). KEY is what's being
935 * searched for.
936 *
937 * Returns a pointer to the entry if it is found, NULL otherwise.
938 */
939 static struct inoinfo *
search_cache(struct inoinfo * element,fsck_ino_t key)940 search_cache(struct inoinfo *element, fsck_ino_t key)
941 {
942 while (element != NULL) {
943 if (element->i_number == key)
944 break;
945 element = element->i_nextlist;
946 }
947
948 return (element);
949 }
950
951 void
inodirty(void)952 inodirty(void)
953 {
954 dirty(pbp);
955 }
956
957 static void
inoflush(void)958 inoflush(void)
959 {
960 if (pbp != NULL)
961 flush(fswritefd, pbp);
962 }
963
964 /*
965 * Interactive wrapper for freeino(), for those times when we're
966 * not sure if we should throw something away.
967 */
968 void
clri(struct inodesc * idesc,char * type,int verbose,int corrupting)969 clri(struct inodesc *idesc, char *type, int verbose, int corrupting)
970 {
971 int need_parent;
972 struct dinode *dp;
973
974 if (statemap[idesc->id_number] == USTATE)
975 return;
976
977 dp = ginode(idesc->id_number);
978 if (verbose == CLRI_VERBOSE) {
979 pwarn("%s %s", type, file_id(idesc->id_number, dp->di_mode));
980 pinode(idesc->id_number);
981 }
982 if (preen || (reply("CLEAR") == 1)) {
983 need_parent = (corrupting == CLRI_NOP_OK) ?
984 TI_NOPARENT : TI_PARENT;
985 freeino(idesc->id_number, need_parent);
986 if (preen)
987 (void) printf(" (CLEARED)\n");
988 remove_orphan_dir(idesc->id_number);
989 } else if (corrupting == CLRI_NOP_CORRUPT) {
990 iscorrupt = 1;
991 }
992 (void) printf("\n");
993 }
994
995 /*
996 * Find the directory entry for the inode noted in id_parent (which is
997 * not necessarily the parent of anything, we're just using a convenient
998 * field.
999 */
1000 int
findname(struct inodesc * idesc)1001 findname(struct inodesc *idesc)
1002 {
1003 struct direct *dirp = idesc->id_dirp;
1004
1005 if (dirp->d_ino != idesc->id_parent)
1006 return (KEEPON);
1007 (void) memmove(idesc->id_name, dirp->d_name,
1008 MIN(dirp->d_namlen, MAXNAMLEN) + 1);
1009 return (STOP|FOUND);
1010 }
1011
1012 /*
1013 * Find the inode number associated with the given name.
1014 */
1015 int
findino(struct inodesc * idesc)1016 findino(struct inodesc *idesc)
1017 {
1018 struct direct *dirp = idesc->id_dirp;
1019
1020 if (dirp->d_ino == 0)
1021 return (KEEPON);
1022 if (strcmp(dirp->d_name, idesc->id_name) == 0 &&
1023 dirp->d_ino >= UFSROOTINO && dirp->d_ino <= maxino) {
1024 idesc->id_parent = dirp->d_ino;
1025 return (STOP|FOUND);
1026 }
1027 return (KEEPON);
1028 }
1029
1030 int
cleardirentry(fsck_ino_t parentdir,fsck_ino_t target)1031 cleardirentry(fsck_ino_t parentdir, fsck_ino_t target)
1032 {
1033 struct inodesc idesc;
1034 struct dinode *dp;
1035
1036 dp = ginode(parentdir);
1037 init_inodesc(&idesc);
1038 idesc.id_func = clearanentry;
1039 idesc.id_parent = target;
1040 idesc.id_type = DATA;
1041 idesc.id_fix = NOFIX;
1042 return (ckinode(dp, &idesc, CKI_TRAVERSE));
1043 }
1044
1045 static int
clearanentry(struct inodesc * idesc)1046 clearanentry(struct inodesc *idesc)
1047 {
1048 struct direct *dirp = idesc->id_dirp;
1049
1050 if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1051 idesc->id_entryno++;
1052 return (KEEPON);
1053 }
1054 dirp->d_ino = 0;
1055 return (STOP|FOUND|ALTERED);
1056 }
1057
1058 void
pinode(fsck_ino_t ino)1059 pinode(fsck_ino_t ino)
1060 {
1061 struct dinode *dp;
1062
1063 (void) printf(" I=%lu ", (ulong_t)ino);
1064 if (ino < UFSROOTINO || ino > maxino)
1065 return;
1066 dp = ginode(ino);
1067 pdinode(dp);
1068 }
1069
1070 static void
pdinode(struct dinode * dp)1071 pdinode(struct dinode *dp)
1072 {
1073 char *p;
1074 struct passwd *pw;
1075 time_t t;
1076
1077 (void) printf(" OWNER=");
1078 if ((pw = getpwuid((int)dp->di_uid)) != 0)
1079 (void) printf("%s ", pw->pw_name);
1080 else
1081 (void) printf("%lu ", (ulong_t)dp->di_uid);
1082 (void) printf("MODE=%o\n", dp->di_mode);
1083 if (preen)
1084 (void) printf("%s: ", devname);
1085 (void) printf("SIZE=%lld ", (longlong_t)dp->di_size);
1086
1087 /* ctime() ignores LOCALE, so this is safe */
1088 t = (time_t)dp->di_mtime;
1089 p = ctime(&t);
1090 (void) printf("MTIME=%12.12s %4.4s ", p + 4, p + 20);
1091 }
1092
1093 void
blkerror(fsck_ino_t ino,char * type,daddr32_t blk,daddr32_t lbn)1094 blkerror(fsck_ino_t ino, char *type, daddr32_t blk, daddr32_t lbn)
1095 {
1096 pfatal("FRAGMENT %d %s I=%u LFN %d", blk, type, ino, lbn);
1097 (void) printf("\n");
1098
1099 switch (statemap[ino] & ~INDELAYD) {
1100
1101 case FSTATE:
1102 case FZLINK:
1103 statemap[ino] = FCLEAR;
1104 return;
1105
1106 case DFOUND:
1107 case DSTATE:
1108 case DZLINK:
1109 statemap[ino] = DCLEAR;
1110 add_orphan_dir(ino);
1111 return;
1112
1113 case SSTATE:
1114 statemap[ino] = SCLEAR;
1115 return;
1116
1117 case FCLEAR:
1118 case DCLEAR:
1119 case SCLEAR:
1120 return;
1121
1122 default:
1123 errexit("BAD STATE 0x%x TO BLKERR\n", statemap[ino]);
1124 /* NOTREACHED */
1125 }
1126 }
1127
1128 /*
1129 * allocate an unused inode
1130 */
1131 fsck_ino_t
allocino(fsck_ino_t request,int type)1132 allocino(fsck_ino_t request, int type)
1133 {
1134 fsck_ino_t ino;
1135 struct dinode *dp;
1136 struct cg *cgp = &cgrp;
1137 int cg;
1138 time_t t;
1139 caddr_t err;
1140
1141 if (debug && (request != 0) && (request != UFSROOTINO))
1142 errexit("assertion failed: allocino() asked for "
1143 "inode %d instead of 0 or %d",
1144 (int)request, (int)UFSROOTINO);
1145
1146 /*
1147 * We know that we're only going to get requests for UFSROOTINO
1148 * or 0. If UFSROOTINO is wanted, then it better be available
1149 * because our caller is trying to recreate the root directory.
1150 * If we're asked for 0, then which one we return doesn't matter.
1151 * We know that inodes 0 and 1 are never valid to return, so we
1152 * the start at the lowest-legal inode number.
1153 *
1154 * If we got a request for UFSROOTINO, then request != 0, and
1155 * this pair of conditionals is the only place that treats
1156 * UFSROOTINO specially.
1157 */
1158 if (request == 0)
1159 request = UFSROOTINO;
1160 else if (statemap[request] != USTATE)
1161 return (0);
1162
1163 /*
1164 * Doesn't do wrapping, since we know we started at
1165 * the smallest inode.
1166 */
1167 for (ino = request; ino < maxino; ino++)
1168 if (statemap[ino] == USTATE)
1169 break;
1170 if (ino == maxino)
1171 return (0);
1172
1173 /*
1174 * In pass5, we'll calculate the bitmaps and counts all again from
1175 * scratch and do a comparison, but for that to work the cg has
1176 * to know what in-memory changes we've made to it. If we have
1177 * trouble reading the cg, cg_sanity() should kick it out so
1178 * we can skip explicit i/o error checking here.
1179 */
1180 cg = itog(&sblock, ino);
1181 (void) getblk(&cgblk, cgtod(&sblock, cg), (size_t)sblock.fs_cgsize);
1182 err = cg_sanity(cgp, cg);
1183 if (err != NULL) {
1184 pfatal("CG %d: %s\n", cg, err);
1185 free((void *)err);
1186 if (reply("REPAIR") == 0)
1187 errexit("Program terminated.");
1188 fix_cg(cgp, cg);
1189 }
1190 setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1191 cgp->cg_cs.cs_nifree--;
1192 cgdirty();
1193
1194 if (lastino < ino)
1195 lastino = ino;
1196
1197 /*
1198 * Don't currently support IFATTRDIR or any of the other
1199 * types, as they aren't needed.
1200 */
1201 switch (type & IFMT) {
1202 case IFDIR:
1203 statemap[ino] = DSTATE;
1204 cgp->cg_cs.cs_ndir++;
1205 break;
1206 case IFREG:
1207 case IFLNK:
1208 statemap[ino] = FSTATE;
1209 break;
1210 default:
1211 /*
1212 * Pretend nothing ever happened. This clears the
1213 * dirty flag, among other things.
1214 */
1215 initbarea(&cgblk);
1216 if (debug)
1217 (void) printf("allocino: unknown type 0%o\n",
1218 type & IFMT);
1219 return (0);
1220 }
1221
1222 /*
1223 * We're allocating what should be a completely-unused inode,
1224 * so make sure we don't inherit anything from any previous
1225 * incarnations.
1226 */
1227 dp = ginode(ino);
1228 (void) memset((void *)dp, 0, sizeof (struct dinode));
1229 dp->di_db[0] = allocblk(1);
1230 if (dp->di_db[0] == 0) {
1231 statemap[ino] = USTATE;
1232 return (0);
1233 }
1234 dp->di_mode = (mode_t)type;
1235 (void) time(&t);
1236 dp->di_atime = (time32_t)t;
1237 dp->di_ctime = dp->di_atime;
1238 dp->di_mtime = dp->di_ctime;
1239 dp->di_size = (u_offset_t)sblock.fs_fsize;
1240 dp->di_blocks = btodb(sblock.fs_fsize);
1241 n_files++;
1242 inodirty();
1243 return (ino);
1244 }
1245
1246 /*
1247 * Release some or all of the blocks of an inode.
1248 * Only truncates down. Assumes new_length is appropriately aligned
1249 * to a block boundary (or a directory block boundary, if it's a
1250 * directory).
1251 *
1252 * If this is a directory, discard all of its contents first, so
1253 * we don't create a bunch of orphans that would need another fsck
1254 * run to clean up.
1255 *
1256 * Even if truncating to zero length, the inode remains allocated.
1257 */
1258 void
truncino(fsck_ino_t ino,offset_t new_length,int update)1259 truncino(fsck_ino_t ino, offset_t new_length, int update)
1260 {
1261 struct inodesc idesc;
1262 struct inoinfo *iip;
1263 struct dinode *dp;
1264 fsck_ino_t parent;
1265 mode_t mode;
1266 caddr_t message;
1267 int isdir, islink;
1268 int ilevel, dblk;
1269
1270 dp = ginode(ino);
1271 mode = (dp->di_mode & IFMT);
1272 isdir = (mode == IFDIR) || (mode == IFATTRDIR);
1273 islink = (mode == IFLNK);
1274
1275 if (isdir) {
1276 /*
1277 * Go with the parent we found by chasing references,
1278 * if we've gotten that far. Otherwise, use what the
1279 * directory itself claims. If there's no ``..'' entry
1280 * in it, give up trying to get the link counts right.
1281 */
1282 if (update == TI_NOPARENT) {
1283 parent = -1;
1284 } else {
1285 iip = getinoinfo(ino);
1286 if (iip != NULL) {
1287 parent = iip->i_parent;
1288 } else {
1289 parent = lookup_dotdot_ino(ino);
1290 if (parent != 0) {
1291 /*
1292 * Make sure that the claimed
1293 * parent actually has a
1294 * reference to us.
1295 */
1296 dp = ginode(parent);
1297 idesc.id_name = lfname;
1298 idesc.id_type = DATA;
1299 idesc.id_func = findino;
1300 idesc.id_number = ino;
1301 idesc.id_fix = DONTKNOW;
1302 if ((ckinode(dp, &idesc,
1303 CKI_TRAVERSE) & FOUND) == 0)
1304 parent = 0;
1305 }
1306 }
1307 }
1308
1309 mark_delayed_inodes(ino, numfrags(&sblock, new_length));
1310 if (parent > 0) {
1311 dp = ginode(parent);
1312 LINK_RANGE(message, dp->di_nlink, -1);
1313 if (message != NULL) {
1314 LINK_CLEAR(message, parent, dp->di_mode,
1315 &idesc);
1316 if (statemap[parent] == USTATE)
1317 goto no_parent_update;
1318 }
1319 TRACK_LNCNTP(parent, lncntp[parent]--);
1320 } else if ((mode == IFDIR) && (parent == 0)) {
1321 /*
1322 * Currently don't have a good way to
1323 * handle this, so throw up our hands.
1324 * However, we know that we can still
1325 * do some good if we continue, so
1326 * don't actually exit yet.
1327 *
1328 * We don't do it for attrdirs,
1329 * because there aren't link counts
1330 * between them and their parents.
1331 */
1332 pwarn("Could not determine former parent of "
1333 "inode %d, link counts are possibly\n"
1334 "incorrect. Please rerun fsck(8) to "
1335 "correct this.\n",
1336 ino);
1337 iscorrupt = 1;
1338 }
1339 /*
1340 * ...else if it's a directory with parent == -1, then
1341 * we've not gotten far enough to know connectivity,
1342 * and it'll get handled automatically later.
1343 */
1344 }
1345
1346 no_parent_update:
1347 init_inodesc(&idesc);
1348 idesc.id_type = ADDR;
1349 idesc.id_func = pass4check;
1350 idesc.id_number = ino;
1351 idesc.id_fix = DONTKNOW;
1352 idesc.id_truncto = howmany(new_length, sblock.fs_bsize);
1353 dp = ginode(ino);
1354 if (!islink && ckinode(dp, &idesc, CKI_TRUNCATE) & ALTERED)
1355 inodirty();
1356
1357 /*
1358 * This has to be done after ckinode(), so that all of
1359 * the fragments get visited. Note that we assume we're
1360 * always truncating to a block boundary, rather than a
1361 * fragment boundary.
1362 */
1363 dp = ginode(ino);
1364 dp->di_size = new_length;
1365
1366 /*
1367 * Clear now-obsolete pointers.
1368 */
1369 for (dblk = idesc.id_truncto + 1; dblk < NDADDR; dblk++) {
1370 dp->di_db[dblk] = 0;
1371 }
1372
1373 ilevel = get_indir_offsets(-1, idesc.id_truncto, NULL, NULL);
1374 for (ilevel++; ilevel < NIADDR; ilevel++) {
1375 dp->di_ib[ilevel] = 0;
1376 }
1377
1378 inodirty();
1379 }
1380
1381 /*
1382 * Release an inode's resources, then release the inode itself.
1383 */
1384 void
freeino(fsck_ino_t ino,int update_parent)1385 freeino(fsck_ino_t ino, int update_parent)
1386 {
1387 int cg;
1388 struct dinode *dp;
1389 struct cg *cgp;
1390
1391 n_files--;
1392 dp = ginode(ino);
1393 /*
1394 * We need to make sure that the file is really a large file.
1395 * Everything bigger than UFS_MAXOFFSET_T is treated as a file with
1396 * negative size, which shall be cleared. (see verify_inode() in
1397 * pass1.c)
1398 */
1399 if (dp->di_size > (u_offset_t)MAXOFF_T &&
1400 dp->di_size <= (u_offset_t)UFS_MAXOFFSET_T &&
1401 ftypeok(dp) &&
1402 (dp->di_mode & IFMT) != IFBLK &&
1403 (dp->di_mode & IFMT) != IFCHR) {
1404 largefile_count--;
1405 }
1406 truncino(ino, 0, update_parent);
1407
1408 dp = ginode(ino);
1409 if ((dp->di_mode & IFMT) == IFATTRDIR) {
1410 clearshadow(ino, &attrclientinfo);
1411 dp = ginode(ino);
1412 }
1413
1414 clearinode(dp);
1415 inodirty();
1416 statemap[ino] = USTATE;
1417
1418 /*
1419 * Keep the disk in sync with us so that pass5 doesn't get
1420 * upset about spurious inconsistencies.
1421 */
1422 cg = itog(&sblock, ino);
1423 (void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cg),
1424 (size_t)sblock.fs_cgsize);
1425 cgp = cgblk.b_un.b_cg;
1426 clrbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1427 cgp->cg_cs.cs_nifree += 1;
1428 cgdirty();
1429 sblock.fs_cstotal.cs_nifree += 1;
1430 sbdirty();
1431 }
1432
1433 void
init_inoinfo(struct inoinfo * inp,struct dinode * dp,fsck_ino_t inum)1434 init_inoinfo(struct inoinfo *inp, struct dinode *dp, fsck_ino_t inum)
1435 {
1436 inp->i_parent = ((inum == UFSROOTINO) ? UFSROOTINO : (fsck_ino_t)0);
1437 inp->i_dotdot = (fsck_ino_t)0;
1438 inp->i_isize = (offset_t)dp->di_size;
1439 inp->i_blkssize = (NDADDR + NIADDR) * sizeof (daddr32_t);
1440 inp->i_extattr = dp->di_oeftflag;
1441 (void) memmove((void *)&inp->i_blks[0], (void *)&dp->di_db[0],
1442 inp->i_blkssize);
1443 }
1444
1445 /*
1446 * Return the inode number in the ".." entry of the provided
1447 * directory inode.
1448 */
1449 static int
lookup_dotdot_ino(fsck_ino_t ino)1450 lookup_dotdot_ino(fsck_ino_t ino)
1451 {
1452 struct inodesc idesc;
1453
1454 init_inodesc(&idesc);
1455 idesc.id_type = DATA;
1456 idesc.id_func = findino;
1457 idesc.id_name = "..";
1458 idesc.id_number = ino;
1459 idesc.id_fix = NOFIX;
1460
1461 if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) != 0) {
1462 return (idesc.id_parent);
1463 }
1464
1465 return (0);
1466 }
1467
1468 /*
1469 * Convenience wrapper around ckinode(findino()).
1470 */
1471 int
lookup_named_ino(fsck_ino_t dir,caddr_t name)1472 lookup_named_ino(fsck_ino_t dir, caddr_t name)
1473 {
1474 struct inodesc idesc;
1475
1476 init_inodesc(&idesc);
1477 idesc.id_type = DATA;
1478 idesc.id_func = findino;
1479 idesc.id_name = name;
1480 idesc.id_number = dir;
1481 idesc.id_fix = NOFIX;
1482
1483 if ((ckinode(ginode(dir), &idesc, CKI_TRAVERSE) & FOUND) != 0) {
1484 return (idesc.id_parent);
1485 }
1486
1487 return (0);
1488 }
1489
1490 /*
1491 * Marks inodes that are being orphaned and might need to be reconnected
1492 * by pass4(). The inode we're traversing is the directory whose
1493 * contents will be reconnected later. id_parent is the lfn at which
1494 * to start looking at said contents.
1495 */
1496 static int
mark_a_delayed_inode(struct inodesc * idesc)1497 mark_a_delayed_inode(struct inodesc *idesc)
1498 {
1499 struct direct *dirp = idesc->id_dirp;
1500
1501 if (idesc->id_lbn < idesc->id_parent) {
1502 return (KEEPON);
1503 }
1504
1505 if (dirp->d_ino != 0 &&
1506 strcmp(dirp->d_name, ".") != 0 &&
1507 strcmp(dirp->d_name, "..") != 0) {
1508 statemap[dirp->d_ino] &= ~INFOUND;
1509 statemap[dirp->d_ino] |= INDELAYD;
1510 }
1511
1512 return (KEEPON);
1513 }
1514
1515 static void
mark_delayed_inodes(fsck_ino_t ino,daddr32_t first_lfn)1516 mark_delayed_inodes(fsck_ino_t ino, daddr32_t first_lfn)
1517 {
1518 struct dinode *dp;
1519 struct inodesc idelayed;
1520
1521 init_inodesc(&idelayed);
1522 idelayed.id_number = ino;
1523 idelayed.id_type = DATA;
1524 idelayed.id_fix = NOFIX;
1525 idelayed.id_func = mark_a_delayed_inode;
1526 idelayed.id_parent = first_lfn;
1527 idelayed.id_entryno = 2;
1528
1529 dp = ginode(ino);
1530 (void) ckinode(dp, &idelayed, CKI_TRAVERSE);
1531 }
1532
1533 /*
1534 * Clear the i_oeftflag/extended attribute pointer from INO.
1535 */
1536 void
clearattrref(fsck_ino_t ino)1537 clearattrref(fsck_ino_t ino)
1538 {
1539 struct dinode *dp;
1540
1541 dp = ginode(ino);
1542 if (debug) {
1543 if (dp->di_oeftflag == 0)
1544 (void) printf("clearattref: no attr to clear on %d\n",
1545 ino);
1546 }
1547
1548 dp->di_oeftflag = 0;
1549 inodirty();
1550 }
1551