1 /*
2 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
3 */
4
5 /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
6 /* All Rights Reserved */
7
8 /*
9 * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms are permitted
13 * provided that: (1) source distributions retain this entire copyright
14 * notice and comment, and (2) distributions including binaries display
15 * the following acknowledgement: ``This product includes software
16 * developed by the University of California, Berkeley and its contributors''
17 * in the documentation or other materials provided with the distribution
18 * and in all advertising materials mentioning features or use of this
19 * software. Neither the name of the University nor the names of its
20 * contributors may be used to endorse or promote products derived
21 * from this software without specific prior written permission.
22 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
23 * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
24 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
25 */
26
27
28 #include <stdio.h>
29 #include <string.h>
30 #include <stdlib.h>
31 #include <unistd.h>
32 #include <time.h>
33 #include <limits.h>
34 #include <sys/param.h>
35 #include <sys/types.h>
36 #include <sys/sysmacros.h>
37 #include <sys/mntent.h>
38 #include <sys/vnode.h>
39 #include <sys/fs/ufs_inode.h>
40 #include <sys/fs/ufs_fs.h>
41 #define _KERNEL
42 #include <sys/fs/ufs_fsdir.h>
43 #undef _KERNEL
44 #include <pwd.h>
45 #include "fsck.h"
46
47 static int get_indir_offsets(int, daddr_t, int *, int *);
48 static int clearanentry(struct inodesc *);
49 static void pdinode(struct dinode *);
50 static void inoflush(void);
51 static void mark_delayed_inodes(fsck_ino_t, daddr32_t);
52 static int iblock(struct inodesc *, int, u_offset_t, enum cki_action);
53 static struct inoinfo *search_cache(struct inoinfo *, fsck_ino_t);
54 static int ckinode_common(struct dinode *, struct inodesc *, enum cki_action);
55 static int lookup_dotdot_ino(fsck_ino_t);
56
57 /*
58 * ckinode() essentially traverses the blocklist of the provided
59 * inode. For each block either the caller-supplied callback (id_func
60 * in the provided struct inodesc) or dirscan() is invoked. Which is
61 * chosen is controlled by what type of traversal was requested
62 * (id_type) - if it was for an ADDR or ACL, use the callback,
63 * otherwise it is assumed to be DATA (i.e., a directory) whose
64 * contents need to be scanned.
65 *
66 * Note that a directory inode can get passed in with a type of ADDR;
67 * the type field is orthogonal to the IFMT value. This is so that
68 * the file aspects (no duplicate blocks, etc) of a directory can be
69 * verified just like is done for any other file, or the actual
70 * contents can be scanned so that connectivity and such can be
71 * investigated.
72 *
73 * The traversal is controlled by flags in the return value of
74 * dirscan() or the callback. Five flags are defined, STOP, SKIP,
75 * KEEPON, ALTERED, and FOUND. Their semantics are:
76 *
77 * STOP - no further processing of this inode is desired/possible/
78 * feasible/etc. This can mean that whatever the scan
79 * was searching for was found, or a serious
80 * inconsistency was encountered, or anything else
81 * appropriate.
82 *
83 * SKIP - something that made it impossible to continue was
84 * encountered, and the caller should go on to the next
85 * inode. This is more for i/o failures than for
86 * logical inconsistencies. Nothing actually looks for
87 * this.
88 *
89 * KEEPON - no more blocks of this inode need to be scanned, but
90 * nothing's wrong, so keep on going with the next
91 * inode. It is similar to STOP, except that
92 * ckinode()'s caller will typically advance to the next
93 * inode for KEEPON, whereas it ceases scanning through
94 * the inodes completely for STOP.
95 *
96 * ALTERED - a change was made to the inode. If the caller sees
97 * this set, it should make sure to flush out the
98 * changes. Note that any data blocks read in by the
99 * function need to be marked dirty by it directly;
100 * flushing of those will happen automatically later.
101 *
102 * FOUND - whatever was being searched for was located.
103 * Typically combined with STOP to avoid wasting time
104 * doing additional looking.
105 *
106 * During a traversal, some state needs to be carried around. At the
107 * least, the callback functions need to know what inode they're
108 * working on, which logical block, and whether or not fixing problems
109 * when they're encountered is desired. Rather than try to guess what
110 * else might be needed (and thus end up passing way more arguments
111 * than is reasonable), all the possibilities have been bundled in
112 * struct inodesc. About half of the fields are specific to directory
113 * traversals, and the rest are pretty much generic to any traversal.
114 *
115 * The general fields are:
116 *
117 * id_fix What to do when an error is found. Generally, this
118 * is set to DONTKNOW before a traversal. If a
119 * problem is encountered, it is changed to either FIX
120 * or NOFIX by the dofix() query function. If id_fix
121 * has already been set to FIX when dofix() is called, then
122 * it includes the ALTERED flag (see above) in its return
123 * value; the net effect is that the inode's buffer
124 * will get marked dirty and written to disk at some
125 * point. If id_fix is DONTKNOW, then dofix() will
126 * query the user. If it is NOFIX, then dofix()
127 * essentially does nothing. A few routines set NOFIX
128 * as the initial value, as they are performing a best-
129 * effort informational task, rather than an actual
130 * repair operation.
131 *
132 * id_func This is the function that will be called for every
133 * logical block in the file (assuming id_type is not
134 * DATA). The logical block may represent a hole, so
135 * the callback needs to be prepared to handle that
136 * case. Its return value is a combination of the flags
137 * described above (SKIP, ALTERED, etc).
138 *
139 * id_number The inode number whose block list or data is being
140 * scanned.
141 *
142 * id_parent When id_type is DATA, this is the inode number for
143 * the parent of id_number. Otherwise, it is
144 * available for use as an extra parameter or return
145 * value between the callback and ckinode()'s caller.
146 * Which, if either, of those is left completely up to
147 * the two routines involved, so nothing can generally
148 * be assumed about the id_parent value for non-DATA
149 * traversals.
150 *
151 * id_lbn This is the current logical block (not fragment)
152 * number being visited by the traversal.
153 *
154 * id_blkno This is the physical block corresponding to id_lbn.
155 *
156 * id_numfrags This defines how large a block is being processed in
157 * this particular invocation of the callback.
158 * Usually, it will be the same as sblock.fs_frag.
159 * However, if a direct block is being processed and
160 * it is less than a full filesystem block,
161 * id_numfrags will indicate just how many fragments
162 * (starting from id_lbn) are actually part of the
163 * file.
164 *
165 * id_truncto The pass 4 callback is used in several places to
166 * free the blocks of a file (the `FILE HAS PROBLEM
167 * FOO; CLEAR?' scenario). This has been generalized
168 * to allow truncating a file to a particular length
169 * rather than always completely discarding it. If
170 * id_truncto is -1, then the entire file is released,
171 * otherwise it is logical block number to truncate
172 * to. This generalized interface was motivated by a
173 * desire to be able to discard everything after a
174 * hole in a directory, rather than the entire
175 * directory.
176 *
177 * id_type Selects the type of traversal. DATA for dirscan(),
178 * ADDR or ACL for using the provided callback.
179 *
180 * There are several more fields used just for dirscan() traversals:
181 *
182 * id_filesize The number of bytes in the overall directory left to
183 * process.
184 *
185 * id_loc Byte position within the directory block. Should always
186 * point to the start of a directory entry.
187 *
188 * id_entryno Which logical directory entry is being processed (0
189 * is `.', 1 is `..', 2 and on are normal entries).
190 * This field is primarily used to enable special
191 * checks when looking at the first two entries.
192 *
193 * The exception (there's always an exception in fsck)
194 * is that in pass 1, it tracks how many fragments are
195 * being used by a particular inode.
196 *
197 * id_firsthole The first logical block number that was found to
198 * be zero. As directories are not supposed to have
199 * holes, this marks where a directory should be
200 * truncated down to. A value of -1 indicates that
201 * no holes were found.
202 *
203 * id_dirp A pointer to the in-memory copy of the current
204 * directory entry (as identified by id_loc).
205 *
206 * id_name This is a directory entry name to either create
207 * (callback is mkentry) or locate (callback is
208 * chgino, findino, or findname).
209 */
210 int
ckinode(struct dinode * dp,struct inodesc * idesc,enum cki_action action)211 ckinode(struct dinode *dp, struct inodesc *idesc, enum cki_action action)
212 {
213 struct inodesc cleardesc;
214 mode_t mode;
215
216 if (idesc->id_filesize == 0)
217 idesc->id_filesize = (offset_t)dp->di_size;
218
219 /*
220 * Our caller should be filtering out completely-free inodes
221 * (mode == zero), so we'll work on the assumption that what
222 * we're given has some basic validity.
223 *
224 * The kernel is inconsistent about MAXPATHLEN including the
225 * trailing \0, so allow the more-generous length for symlinks.
226 */
227 mode = dp->di_mode & IFMT;
228 if (mode == IFBLK || mode == IFCHR)
229 return (KEEPON);
230 if (mode == IFLNK && dp->di_size > MAXPATHLEN) {
231 pwarn("I=%d Symlink longer than supported maximum\n",
232 idesc->id_number);
233 init_inodesc(&cleardesc);
234 cleardesc.id_type = ADDR;
235 cleardesc.id_number = idesc->id_number;
236 cleardesc.id_fix = DONTKNOW;
237 clri(&cleardesc, "BAD", CLRI_VERBOSE, CLRI_NOP_CORRUPT);
238 return (STOP);
239 }
240 return (ckinode_common(dp, idesc, action));
241 }
242
243 /*
244 * This was split out from ckinode() to allow it to be used
245 * without having to pass in kludge flags to suppress the
246 * wrong-for-deletion initialization and irrelevant checks.
247 * This feature is no longer needed, but is being kept in case
248 * the need comes back.
249 */
250 static int
ckinode_common(struct dinode * dp,struct inodesc * idesc,enum cki_action action)251 ckinode_common(struct dinode *dp, struct inodesc *idesc,
252 enum cki_action action)
253 {
254 offset_t offset;
255 struct dinode dino;
256 daddr_t ndb;
257 int indir_data_blks, last_indir_blk;
258 int ret, i, frags;
259
260 (void) memmove(&dino, dp, sizeof (struct dinode));
261 ndb = howmany(dino.di_size, (u_offset_t)sblock.fs_bsize);
262
263 for (i = 0; i < NDADDR; i++) {
264 idesc->id_lbn++;
265 offset = blkoff(&sblock, dino.di_size);
266 if ((--ndb == 0) && (offset != 0)) {
267 idesc->id_numfrags =
268 numfrags(&sblock, fragroundup(&sblock, offset));
269 } else {
270 idesc->id_numfrags = sblock.fs_frag;
271 }
272 if (dino.di_db[i] == 0) {
273 if ((ndb > 0) && (idesc->id_firsthole < 0)) {
274 idesc->id_firsthole = i;
275 }
276 continue;
277 }
278 idesc->id_blkno = dino.di_db[i];
279 if (idesc->id_type == ADDR || idesc->id_type == ACL)
280 ret = (*idesc->id_func)(idesc);
281 else
282 ret = dirscan(idesc);
283
284 /*
285 * Need to clear the entry, now that we're done with
286 * it. We depend on freeblk() ignoring a request to
287 * free already-free fragments to handle the problem of
288 * a partial block.
289 */
290 if ((action == CKI_TRUNCATE) &&
291 (idesc->id_truncto >= 0) &&
292 (idesc->id_lbn >= idesc->id_truncto)) {
293 dp = ginode(idesc->id_number);
294 /*
295 * The (int) cast is safe, in that if di_size won't
296 * fit, it'll be a multiple of any legal fs_frag,
297 * thus giving a zero result. That value, in turn
298 * means we're doing an entire block.
299 */
300 frags = howmany((int)dp->di_size, sblock.fs_fsize) %
301 sblock.fs_frag;
302 if (frags == 0)
303 frags = sblock.fs_frag;
304 freeblk(idesc->id_number, dp->di_db[i],
305 frags);
306 dp = ginode(idesc->id_number);
307 dp->di_db[i] = 0;
308 inodirty();
309 ret |= ALTERED;
310 }
311
312 if (ret & STOP)
313 return (ret);
314 }
315
316 #ifdef lint
317 /*
318 * Cure a lint complaint of ``possible use before set''.
319 * Apparently it can't quite figure out the switch statement.
320 */
321 indir_data_blks = 0;
322 #endif
323 /*
324 * indir_data_blks contains the number of data blocks in all
325 * the previous levels for this iteration. E.g., for the
326 * single indirect case (i = 0, di_ib[i] != 0), NDADDR's worth
327 * of blocks have already been covered by the direct blocks
328 * (di_db[]). At the triple indirect level (i = NIADDR - 1),
329 * it is all of the number of data blocks that were covered
330 * by the second indirect, single indirect, and direct block
331 * levels.
332 */
333 idesc->id_numfrags = sblock.fs_frag;
334 ndb = howmany(dino.di_size, (u_offset_t)sblock.fs_bsize);
335 for (i = 0; i < NIADDR; i++) {
336 (void) get_indir_offsets(i, ndb, &indir_data_blks,
337 &last_indir_blk);
338 if (dino.di_ib[i] != 0) {
339 /*
340 * We'll only clear di_ib[i] if the first entry (and
341 * therefore all of them) is to be cleared, since we
342 * only go through this code on the first entry of
343 * each level of indirection. The +1 is to account
344 * for the fact that we don't modify id_lbn until
345 * we actually start processing on a data block.
346 */
347 idesc->id_blkno = dino.di_ib[i];
348 ret = iblock(idesc, i + 1,
349 (u_offset_t)howmany(dino.di_size,
350 (u_offset_t)sblock.fs_bsize) - indir_data_blks,
351 action);
352 if ((action == CKI_TRUNCATE) &&
353 (idesc->id_truncto <= indir_data_blks) &&
354 ((idesc->id_lbn + 1) >= indir_data_blks) &&
355 ((idesc->id_lbn + 1) <= last_indir_blk)) {
356 dp = ginode(idesc->id_number);
357 if (dp->di_ib[i] != 0) {
358 freeblk(idesc->id_number, dp->di_ib[i],
359 sblock.fs_frag);
360 }
361 }
362 if (ret & STOP)
363 return (ret);
364 } else {
365 /*
366 * Need to know which of the file's logical blocks
367 * reside in the missing indirect block. However, the
368 * precise location is only needed for truncating
369 * directories, and level-of-indirection precision is
370 * sufficient for that.
371 */
372 if ((indir_data_blks < ndb) &&
373 (idesc->id_firsthole < 0)) {
374 idesc->id_firsthole = indir_data_blks;
375 }
376 }
377 }
378 return (KEEPON);
379 }
380
381 static int
get_indir_offsets(int ilevel_wanted,daddr_t ndb,int * data_blks,int * last_blk)382 get_indir_offsets(int ilevel_wanted, daddr_t ndb, int *data_blks,
383 int *last_blk)
384 {
385 int ndb_ilevel = -1;
386 int ilevel;
387 int dblks, lblk;
388
389 for (ilevel = 0; ilevel < NIADDR; ilevel++) {
390 switch (ilevel) {
391 case 0: /* SINGLE */
392 dblks = NDADDR;
393 lblk = dblks + NINDIR(&sblock) - 1;
394 break;
395 case 1: /* DOUBLE */
396 dblks = NDADDR + NINDIR(&sblock);
397 lblk = dblks + (NINDIR(&sblock) * NINDIR(&sblock)) - 1;
398 break;
399 case 2: /* TRIPLE */
400 dblks = NDADDR + NINDIR(&sblock) +
401 (NINDIR(&sblock) * NINDIR(&sblock));
402 lblk = dblks + (NINDIR(&sblock) * NINDIR(&sblock) *
403 NINDIR(&sblock)) - 1;
404 break;
405 default:
406 exitstat = EXERRFATAL;
407 /*
408 * Translate from zero-based array to
409 * one-based human-style counting.
410 */
411 errexit("panic: indirection level %d not 1, 2, or 3",
412 ilevel + 1);
413 /* NOTREACHED */
414 }
415
416 if (dblks < ndb && ndb <= lblk)
417 ndb_ilevel = ilevel;
418
419 if (ilevel == ilevel_wanted) {
420 if (data_blks != NULL)
421 *data_blks = dblks;
422 if (last_blk != NULL)
423 *last_blk = lblk;
424 }
425 }
426
427 return (ndb_ilevel);
428 }
429
430 static int
iblock(struct inodesc * idesc,int ilevel,u_offset_t iblks,enum cki_action action)431 iblock(struct inodesc *idesc, int ilevel, u_offset_t iblks,
432 enum cki_action action)
433 {
434 struct bufarea *bp;
435 int i, n;
436 int (*func)(struct inodesc *) = NULL;
437 u_offset_t fsbperindirb;
438 daddr32_t last_lbn;
439 int nif;
440 char buf[BUFSIZ];
441
442 n = KEEPON;
443
444 switch (idesc->id_type) {
445 case ADDR:
446 func = idesc->id_func;
447 if (((n = (*func)(idesc)) & KEEPON) == 0)
448 return (n);
449 break;
450 case ACL:
451 func = idesc->id_func;
452 break;
453 case DATA:
454 func = dirscan;
455 break;
456 default:
457 errexit("unknown inodesc type %d in iblock()", idesc->id_type);
458 /* NOTREACHED */
459 }
460 if (chkrange(idesc->id_blkno, idesc->id_numfrags)) {
461 return ((idesc->id_type == ACL) ? STOP : SKIP);
462 }
463
464 bp = getdatablk(idesc->id_blkno, (size_t)sblock.fs_bsize);
465 if (bp->b_errs != 0) {
466 brelse(bp);
467 return (SKIP);
468 }
469
470 ilevel--;
471 /*
472 * Trivia note: the BSD fsck has the number of bytes remaining
473 * as the third argument to iblock(), so the equivalent of
474 * fsbperindirb starts at fs_bsize instead of one. We're
475 * working in units of filesystem blocks here, not bytes or
476 * fragments.
477 */
478 for (fsbperindirb = 1, i = 0; i < ilevel; i++) {
479 fsbperindirb *= (u_offset_t)NINDIR(&sblock);
480 }
481 /*
482 * nif indicates the next "free" pointer (as an array index) in this
483 * indirect block, based on counting the blocks remaining in the
484 * file after subtracting all previously processed blocks.
485 * This figure is based on the size field of the inode.
486 *
487 * Note that in normal operation, nif may initially be calculated
488 * as larger than the number of pointers in this block (as when
489 * there are more indirect blocks following); if that is
490 * the case, nif is limited to the max number of pointers per
491 * indirect block.
492 *
493 * Also note that if an inode is inconsistent (has more blocks
494 * allocated to it than the size field would indicate), the sweep
495 * through any indirect blocks directly pointed at by the inode
496 * continues. Since the block offset of any data blocks referenced
497 * by these indirect blocks is greater than the size of the file,
498 * the index nif may be computed as a negative value.
499 * In this case, we reset nif to indicate that all pointers in
500 * this retrieval block should be zeroed and the resulting
501 * unreferenced data and/or retrieval blocks will be recovered
502 * through garbage collection later.
503 */
504 nif = (offset_t)howmany(iblks, fsbperindirb);
505 if (nif > NINDIR(&sblock))
506 nif = NINDIR(&sblock);
507 else if (nif < 0)
508 nif = 0;
509 /*
510 * first pass: all "free" retrieval pointers (from [nif] thru
511 * the end of the indirect block) should be zero. (This
512 * assertion does not hold for directories, which may be
513 * truncated without releasing their allocated space)
514 */
515 if (nif < NINDIR(&sblock) && (idesc->id_func == pass1check ||
516 idesc->id_func == pass3bcheck)) {
517 for (i = nif; i < NINDIR(&sblock); i++) {
518 if (bp->b_un.b_indir[i] == 0)
519 continue;
520 (void) sprintf(buf, "PARTIALLY TRUNCATED INODE I=%lu",
521 (ulong_t)idesc->id_number);
522 if (preen) {
523 pfatal(buf);
524 } else if (dofix(idesc, buf)) {
525 freeblk(idesc->id_number,
526 bp->b_un.b_indir[i],
527 sblock.fs_frag);
528 bp->b_un.b_indir[i] = 0;
529 dirty(bp);
530 }
531 }
532 flush(fswritefd, bp);
533 }
534 /*
535 * second pass: all retrieval pointers referring to blocks within
536 * a valid range [0..filesize] (both indirect and data blocks)
537 * are examined in the same manner as ckinode() checks the
538 * direct blocks in the inode. Sweep through from
539 * the first pointer in this retrieval block to [nif-1].
540 */
541 last_lbn = howmany(idesc->id_filesize, sblock.fs_bsize);
542 for (i = 0; i < nif; i++) {
543 if (ilevel == 0)
544 idesc->id_lbn++;
545 if (bp->b_un.b_indir[i] != 0) {
546 idesc->id_blkno = bp->b_un.b_indir[i];
547 if (ilevel > 0) {
548 n = iblock(idesc, ilevel, iblks, action);
549 /*
550 * Each iteration decreases "remaining block
551 * count" by the number of blocks accessible
552 * by a pointer at this indirect block level.
553 */
554 iblks -= fsbperindirb;
555 } else {
556 /*
557 * If we're truncating, func will discard
558 * the data block for us.
559 */
560 n = (*func)(idesc);
561 }
562
563 if ((action == CKI_TRUNCATE) &&
564 (idesc->id_truncto >= 0) &&
565 (idesc->id_lbn >= idesc->id_truncto)) {
566 freeblk(idesc->id_number, bp->b_un.b_indir[i],
567 sblock.fs_frag);
568 }
569
570 /*
571 * Note that truncation never gets STOP back
572 * under normal circumstances. Abnormal would
573 * be a bad acl short-circuit in iblock() or
574 * an out-of-range failure in pass4check().
575 * We still want to keep going when truncating
576 * under those circumstances, since the whole
577 * point of truncating is to get rid of all
578 * that.
579 */
580 if ((n & STOP) && (action != CKI_TRUNCATE)) {
581 brelse(bp);
582 return (n);
583 }
584 } else {
585 if ((idesc->id_lbn < last_lbn) &&
586 (idesc->id_firsthole < 0)) {
587 idesc->id_firsthole = idesc->id_lbn;
588 }
589 if (idesc->id_type == DATA) {
590 /*
591 * No point in continuing in the indirect
592 * blocks of a directory, since they'll just
593 * get freed anyway.
594 */
595 brelse(bp);
596 return ((n & ~KEEPON) | STOP);
597 }
598 }
599 }
600
601 brelse(bp);
602 return (KEEPON);
603 }
604
605 /*
606 * Check that a block is a legal block number.
607 * Return 0 if in range, 1 if out of range.
608 */
609 int
chkrange(daddr32_t blk,int cnt)610 chkrange(daddr32_t blk, int cnt)
611 {
612 int c;
613
614 if (cnt <= 0 || blk <= 0 || ((unsigned)blk >= (unsigned)maxfsblock) ||
615 ((cnt - 1) > (maxfsblock - blk))) {
616 if (debug)
617 (void) printf(
618 "Bad fragment range: should be 1 <= %d..%d < %d\n",
619 blk, blk + cnt, maxfsblock);
620 return (1);
621 }
622 if ((cnt > sblock.fs_frag) ||
623 ((fragnum(&sblock, blk) + cnt) > sblock.fs_frag)) {
624 if (debug)
625 (void) printf("Bad fragment size: size %d\n", cnt);
626 return (1);
627 }
628 c = dtog(&sblock, blk);
629 if (blk < cgdmin(&sblock, c)) {
630 if ((unsigned)(blk + cnt) > (unsigned)cgsblock(&sblock, c)) {
631 if (debug)
632 (void) printf(
633 "Bad fragment position: %d..%d spans start of cg metadata\n",
634 blk, blk + cnt);
635 return (1);
636 }
637 } else {
638 if ((unsigned)(blk + cnt) > (unsigned)cgbase(&sblock, c+1)) {
639 if (debug)
640 (void) printf(
641 "Bad frag pos: %d..%d crosses end of cg\n",
642 blk, blk + cnt);
643 return (1);
644 }
645 }
646 return (0);
647 }
648
649 /*
650 * General purpose interface for reading inodes.
651 */
652
653 /*
654 * Note that any call to ginode() can potentially invalidate any
655 * dinode pointers previously acquired from it. To avoid pain,
656 * make sure to always call inodirty() immediately after modifying
657 * an inode, if there's any chance of ginode() being called after
658 * that. Also, always call ginode() right before you need to access
659 * an inode, so that there won't be any surprises from functions
660 * called between the previous ginode() invocation and the dinode
661 * use.
662 *
663 * Despite all that, we aren't doing the amount of i/o that's implied,
664 * as we use the buffer cache that getdatablk() and friends maintain.
665 */
666 static fsck_ino_t startinum = -1;
667
668 struct dinode *
ginode(fsck_ino_t inum)669 ginode(fsck_ino_t inum)
670 {
671 daddr32_t iblk;
672 struct dinode *dp;
673
674 if (inum < UFSROOTINO || inum > maxino) {
675 errexit("bad inode number %d to ginode\n", inum);
676 }
677 if (startinum == -1 ||
678 pbp == NULL ||
679 inum < startinum ||
680 inum >= (fsck_ino_t)(startinum + (fsck_ino_t)INOPB(&sblock))) {
681 iblk = itod(&sblock, inum);
682 if (pbp != NULL) {
683 brelse(pbp);
684 }
685 /*
686 * We don't check for errors here, because we can't
687 * tell our caller about it, and the zeros that will
688 * be in the buffer are just as good as anything we
689 * could fake.
690 */
691 pbp = getdatablk(iblk, (size_t)sblock.fs_bsize);
692 startinum =
693 (fsck_ino_t)((inum / INOPB(&sblock)) * INOPB(&sblock));
694 }
695 dp = &pbp->b_un.b_dinode[inum % INOPB(&sblock)];
696 if (dp->di_suid != UID_LONG)
697 dp->di_uid = dp->di_suid;
698 if (dp->di_sgid != GID_LONG)
699 dp->di_gid = dp->di_sgid;
700 return (dp);
701 }
702
703 /*
704 * Special purpose version of ginode used to optimize first pass
705 * over all the inodes in numerical order. It bypasses the buffer
706 * system used by ginode(), etc in favour of reading the bulk of a
707 * cg's inodes at one time.
708 */
709 static fsck_ino_t nextino, lastinum;
710 static int64_t readcnt, readpercg, fullcnt, inobufsize;
711 static int64_t partialcnt, partialsize;
712 static size_t lastsize;
713 static struct dinode *inodebuf;
714 static diskaddr_t currentdblk;
715 static struct dinode *currentinode;
716
717 struct dinode *
getnextinode(fsck_ino_t inum)718 getnextinode(fsck_ino_t inum)
719 {
720 size_t size;
721 diskaddr_t dblk;
722 static struct dinode *dp;
723
724 if (inum != nextino++ || inum > maxino)
725 errexit("bad inode number %d to nextinode\n", inum);
726
727 /*
728 * Will always go into the if() the first time we're called,
729 * so dp will always be valid.
730 */
731 if (inum >= lastinum) {
732 readcnt++;
733 dblk = fsbtodb(&sblock, itod(&sblock, lastinum));
734 currentdblk = dblk;
735 if (readcnt % readpercg == 0) {
736 if (partialsize > SIZE_MAX)
737 errexit(
738 "Internal error: partialsize overflow");
739 size = (size_t)partialsize;
740 lastinum += partialcnt;
741 } else {
742 if (inobufsize > SIZE_MAX)
743 errexit("Internal error: inobufsize overflow");
744 size = (size_t)inobufsize;
745 lastinum += fullcnt;
746 }
747 /*
748 * If fsck_bread() returns an error, it will already have
749 * zeroed out the buffer, so we do not need to do so here.
750 */
751 (void) fsck_bread(fsreadfd, (caddr_t)inodebuf, dblk, size);
752 lastsize = size;
753 dp = inodebuf;
754 }
755 currentinode = dp;
756 return (dp++);
757 }
758
759 /*
760 * Reread the current getnext() buffer. This allows for changing inodes
761 * other than the current one via ginode()/inodirty()/inoflush().
762 *
763 * Just reuses all the interesting variables that getnextinode() set up
764 * last time it was called. This shouldn't get called often, so we don't
765 * try to figure out if the caller's actually touched an inode in the
766 * range we have cached. There could have been an arbitrary number of
767 * them, after all.
768 */
769 struct dinode *
getnextrefresh(void)770 getnextrefresh(void)
771 {
772 if (inodebuf == NULL) {
773 return (NULL);
774 }
775
776 inoflush();
777 (void) fsck_bread(fsreadfd, (caddr_t)inodebuf, currentdblk, lastsize);
778 return (currentinode);
779 }
780
781 void
resetinodebuf(void)782 resetinodebuf(void)
783 {
784 startinum = 0;
785 nextino = 0;
786 lastinum = 0;
787 readcnt = 0;
788 inobufsize = blkroundup(&sblock, INOBUFSIZE);
789 fullcnt = inobufsize / sizeof (struct dinode);
790 readpercg = sblock.fs_ipg / fullcnt;
791 partialcnt = sblock.fs_ipg % fullcnt;
792 partialsize = partialcnt * sizeof (struct dinode);
793 if (partialcnt != 0) {
794 readpercg++;
795 } else {
796 partialcnt = fullcnt;
797 partialsize = inobufsize;
798 }
799 if (inodebuf == NULL &&
800 (inodebuf = (struct dinode *)malloc((unsigned)inobufsize)) == NULL)
801 errexit("Cannot allocate space for inode buffer\n");
802 while (nextino < UFSROOTINO)
803 (void) getnextinode(nextino);
804 }
805
806 void
freeinodebuf(void)807 freeinodebuf(void)
808 {
809 if (inodebuf != NULL) {
810 free((void *)inodebuf);
811 }
812 inodebuf = NULL;
813 }
814
815 /*
816 * Routines to maintain information about directory inodes.
817 * This is built during the first pass and used during the
818 * second and third passes.
819 *
820 * Enter inodes into the cache.
821 */
822 void
cacheino(struct dinode * dp,fsck_ino_t inum)823 cacheino(struct dinode *dp, fsck_ino_t inum)
824 {
825 struct inoinfo *inp;
826 struct inoinfo **inpp;
827 uint_t blks;
828
829 blks = NDADDR + NIADDR;
830 inp = (struct inoinfo *)
831 malloc(sizeof (*inp) + (blks - 1) * sizeof (daddr32_t));
832 if (inp == NULL)
833 errexit("Cannot increase directory list\n");
834 init_inoinfo(inp, dp, inum); /* doesn't touch i_nextlist or i_number */
835 inpp = &inphead[inum % numdirs];
836 inp->i_nextlist = *inpp;
837 *inpp = inp;
838 inp->i_number = inum;
839 if (inplast == listmax) {
840 listmax += 100;
841 inpsort = (struct inoinfo **)realloc((void *)inpsort,
842 (unsigned)listmax * sizeof (struct inoinfo *));
843 if (inpsort == NULL)
844 errexit("cannot increase directory list");
845 }
846 inpsort[inplast++] = inp;
847 }
848
849 /*
850 * Look up an inode cache structure.
851 */
852 struct inoinfo *
getinoinfo(fsck_ino_t inum)853 getinoinfo(fsck_ino_t inum)
854 {
855 struct inoinfo *inp;
856
857 inp = search_cache(inphead[inum % numdirs], inum);
858 return (inp);
859 }
860
861 /*
862 * Determine whether inode is in cache.
863 */
864 int
inocached(fsck_ino_t inum)865 inocached(fsck_ino_t inum)
866 {
867 return (search_cache(inphead[inum % numdirs], inum) != NULL);
868 }
869
870 /*
871 * Clean up all the inode cache structure.
872 */
873 void
inocleanup(void)874 inocleanup(void)
875 {
876 struct inoinfo **inpp;
877
878 if (inphead == NULL)
879 return;
880 for (inpp = &inpsort[inplast - 1]; inpp >= inpsort; inpp--) {
881 free((void *)(*inpp));
882 }
883 free((void *)inphead);
884 free((void *)inpsort);
885 inphead = inpsort = NULL;
886 }
887
888 /*
889 * Routines to maintain information about acl inodes.
890 * This is built during the first pass and used during the
891 * second and third passes.
892 *
893 * Enter acl inodes into the cache.
894 */
895 void
cacheacl(struct dinode * dp,fsck_ino_t inum)896 cacheacl(struct dinode *dp, fsck_ino_t inum)
897 {
898 struct inoinfo *aclp;
899 struct inoinfo **aclpp;
900 uint_t blks;
901
902 blks = NDADDR + NIADDR;
903 aclp = (struct inoinfo *)
904 malloc(sizeof (*aclp) + (blks - 1) * sizeof (daddr32_t));
905 if (aclp == NULL)
906 return;
907 aclpp = &aclphead[inum % numacls];
908 aclp->i_nextlist = *aclpp;
909 *aclpp = aclp;
910 aclp->i_number = inum;
911 aclp->i_isize = (offset_t)dp->di_size;
912 aclp->i_blkssize = (size_t)(blks * sizeof (daddr32_t));
913 (void) memmove(&aclp->i_blks[0], &dp->di_db[0], aclp->i_blkssize);
914 if (aclplast == aclmax) {
915 aclmax += 100;
916 aclpsort = (struct inoinfo **)realloc((char *)aclpsort,
917 (unsigned)aclmax * sizeof (struct inoinfo *));
918 if (aclpsort == NULL)
919 errexit("cannot increase acl list");
920 }
921 aclpsort[aclplast++] = aclp;
922 }
923
924
925 /*
926 * Generic cache search function.
927 * ROOT is the first entry in a hash chain (the caller is expected
928 * to have done the initial bucket lookup). KEY is what's being
929 * searched for.
930 *
931 * Returns a pointer to the entry if it is found, NULL otherwise.
932 */
933 static struct inoinfo *
search_cache(struct inoinfo * element,fsck_ino_t key)934 search_cache(struct inoinfo *element, fsck_ino_t key)
935 {
936 while (element != NULL) {
937 if (element->i_number == key)
938 break;
939 element = element->i_nextlist;
940 }
941
942 return (element);
943 }
944
945 void
inodirty(void)946 inodirty(void)
947 {
948 dirty(pbp);
949 }
950
951 static void
inoflush(void)952 inoflush(void)
953 {
954 if (pbp != NULL)
955 flush(fswritefd, pbp);
956 }
957
958 /*
959 * Interactive wrapper for freeino(), for those times when we're
960 * not sure if we should throw something away.
961 */
962 void
clri(struct inodesc * idesc,char * type,int verbose,int corrupting)963 clri(struct inodesc *idesc, char *type, int verbose, int corrupting)
964 {
965 int need_parent;
966 struct dinode *dp;
967
968 if (statemap[idesc->id_number] == USTATE)
969 return;
970
971 dp = ginode(idesc->id_number);
972 if (verbose == CLRI_VERBOSE) {
973 pwarn("%s %s", type, file_id(idesc->id_number, dp->di_mode));
974 pinode(idesc->id_number);
975 }
976 if (preen || (reply("CLEAR") == 1)) {
977 need_parent = (corrupting == CLRI_NOP_OK) ?
978 TI_NOPARENT : TI_PARENT;
979 freeino(idesc->id_number, need_parent);
980 if (preen)
981 (void) printf(" (CLEARED)\n");
982 remove_orphan_dir(idesc->id_number);
983 } else if (corrupting == CLRI_NOP_CORRUPT) {
984 iscorrupt = 1;
985 }
986 (void) printf("\n");
987 }
988
989 /*
990 * Find the directory entry for the inode noted in id_parent (which is
991 * not necessarily the parent of anything, we're just using a convenient
992 * field.
993 */
994 int
findname(struct inodesc * idesc)995 findname(struct inodesc *idesc)
996 {
997 struct direct *dirp = idesc->id_dirp;
998
999 if (dirp->d_ino != idesc->id_parent)
1000 return (KEEPON);
1001 (void) memmove(idesc->id_name, dirp->d_name,
1002 MIN(dirp->d_namlen, MAXNAMLEN) + 1);
1003 return (STOP|FOUND);
1004 }
1005
1006 /*
1007 * Find the inode number associated with the given name.
1008 */
1009 int
findino(struct inodesc * idesc)1010 findino(struct inodesc *idesc)
1011 {
1012 struct direct *dirp = idesc->id_dirp;
1013
1014 if (dirp->d_ino == 0)
1015 return (KEEPON);
1016 if (strcmp(dirp->d_name, idesc->id_name) == 0 &&
1017 dirp->d_ino >= UFSROOTINO && dirp->d_ino <= maxino) {
1018 idesc->id_parent = dirp->d_ino;
1019 return (STOP|FOUND);
1020 }
1021 return (KEEPON);
1022 }
1023
1024 int
cleardirentry(fsck_ino_t parentdir,fsck_ino_t target)1025 cleardirentry(fsck_ino_t parentdir, fsck_ino_t target)
1026 {
1027 struct inodesc idesc;
1028 struct dinode *dp;
1029
1030 dp = ginode(parentdir);
1031 init_inodesc(&idesc);
1032 idesc.id_func = clearanentry;
1033 idesc.id_parent = target;
1034 idesc.id_type = DATA;
1035 idesc.id_fix = NOFIX;
1036 return (ckinode(dp, &idesc, CKI_TRAVERSE));
1037 }
1038
1039 static int
clearanentry(struct inodesc * idesc)1040 clearanentry(struct inodesc *idesc)
1041 {
1042 struct direct *dirp = idesc->id_dirp;
1043
1044 if (dirp->d_ino != idesc->id_parent || idesc->id_entryno < 2) {
1045 idesc->id_entryno++;
1046 return (KEEPON);
1047 }
1048 dirp->d_ino = 0;
1049 return (STOP|FOUND|ALTERED);
1050 }
1051
1052 void
pinode(fsck_ino_t ino)1053 pinode(fsck_ino_t ino)
1054 {
1055 struct dinode *dp;
1056
1057 (void) printf(" I=%lu ", (ulong_t)ino);
1058 if (ino < UFSROOTINO || ino > maxino)
1059 return;
1060 dp = ginode(ino);
1061 pdinode(dp);
1062 }
1063
1064 static void
pdinode(struct dinode * dp)1065 pdinode(struct dinode *dp)
1066 {
1067 char *p;
1068 struct passwd *pw;
1069 time_t t;
1070
1071 (void) printf(" OWNER=");
1072 if ((pw = getpwuid((int)dp->di_uid)) != 0)
1073 (void) printf("%s ", pw->pw_name);
1074 else
1075 (void) printf("%lu ", (ulong_t)dp->di_uid);
1076 (void) printf("MODE=%o\n", dp->di_mode);
1077 if (preen)
1078 (void) printf("%s: ", devname);
1079 (void) printf("SIZE=%lld ", (longlong_t)dp->di_size);
1080
1081 /* ctime() ignores LOCALE, so this is safe */
1082 t = (time_t)dp->di_mtime;
1083 p = ctime(&t);
1084 (void) printf("MTIME=%12.12s %4.4s ", p + 4, p + 20);
1085 }
1086
1087 void
blkerror(fsck_ino_t ino,char * type,daddr32_t blk,daddr32_t lbn)1088 blkerror(fsck_ino_t ino, char *type, daddr32_t blk, daddr32_t lbn)
1089 {
1090 pfatal("FRAGMENT %d %s I=%u LFN %d", blk, type, ino, lbn);
1091 (void) printf("\n");
1092
1093 switch (statemap[ino] & ~INDELAYD) {
1094
1095 case FSTATE:
1096 case FZLINK:
1097 statemap[ino] = FCLEAR;
1098 return;
1099
1100 case DFOUND:
1101 case DSTATE:
1102 case DZLINK:
1103 statemap[ino] = DCLEAR;
1104 add_orphan_dir(ino);
1105 return;
1106
1107 case SSTATE:
1108 statemap[ino] = SCLEAR;
1109 return;
1110
1111 case FCLEAR:
1112 case DCLEAR:
1113 case SCLEAR:
1114 return;
1115
1116 default:
1117 errexit("BAD STATE 0x%x TO BLKERR\n", statemap[ino]);
1118 /* NOTREACHED */
1119 }
1120 }
1121
1122 /*
1123 * allocate an unused inode
1124 */
1125 fsck_ino_t
allocino(fsck_ino_t request,int type)1126 allocino(fsck_ino_t request, int type)
1127 {
1128 fsck_ino_t ino;
1129 struct dinode *dp;
1130 struct cg *cgp = &cgrp;
1131 int cg;
1132 time_t t;
1133 caddr_t err;
1134
1135 if (debug && (request != 0) && (request != UFSROOTINO))
1136 errexit("assertion failed: allocino() asked for "
1137 "inode %d instead of 0 or %d",
1138 (int)request, (int)UFSROOTINO);
1139
1140 /*
1141 * We know that we're only going to get requests for UFSROOTINO
1142 * or 0. If UFSROOTINO is wanted, then it better be available
1143 * because our caller is trying to recreate the root directory.
1144 * If we're asked for 0, then which one we return doesn't matter.
1145 * We know that inodes 0 and 1 are never valid to return, so we
1146 * the start at the lowest-legal inode number.
1147 *
1148 * If we got a request for UFSROOTINO, then request != 0, and
1149 * this pair of conditionals is the only place that treats
1150 * UFSROOTINO specially.
1151 */
1152 if (request == 0)
1153 request = UFSROOTINO;
1154 else if (statemap[request] != USTATE)
1155 return (0);
1156
1157 /*
1158 * Doesn't do wrapping, since we know we started at
1159 * the smallest inode.
1160 */
1161 for (ino = request; ino < maxino; ino++)
1162 if (statemap[ino] == USTATE)
1163 break;
1164 if (ino == maxino)
1165 return (0);
1166
1167 /*
1168 * In pass5, we'll calculate the bitmaps and counts all again from
1169 * scratch and do a comparison, but for that to work the cg has
1170 * to know what in-memory changes we've made to it. If we have
1171 * trouble reading the cg, cg_sanity() should kick it out so
1172 * we can skip explicit i/o error checking here.
1173 */
1174 cg = itog(&sblock, ino);
1175 (void) getblk(&cgblk, cgtod(&sblock, cg), (size_t)sblock.fs_cgsize);
1176 err = cg_sanity(cgp, cg);
1177 if (err != NULL) {
1178 pfatal("CG %d: %s\n", cg, err);
1179 free((void *)err);
1180 if (reply("REPAIR") == 0)
1181 errexit("Program terminated.");
1182 fix_cg(cgp, cg);
1183 }
1184 setbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1185 cgp->cg_cs.cs_nifree--;
1186 cgdirty();
1187
1188 if (lastino < ino)
1189 lastino = ino;
1190
1191 /*
1192 * Don't currently support IFATTRDIR or any of the other
1193 * types, as they aren't needed.
1194 */
1195 switch (type & IFMT) {
1196 case IFDIR:
1197 statemap[ino] = DSTATE;
1198 cgp->cg_cs.cs_ndir++;
1199 break;
1200 case IFREG:
1201 case IFLNK:
1202 statemap[ino] = FSTATE;
1203 break;
1204 default:
1205 /*
1206 * Pretend nothing ever happened. This clears the
1207 * dirty flag, among other things.
1208 */
1209 initbarea(&cgblk);
1210 if (debug)
1211 (void) printf("allocino: unknown type 0%o\n",
1212 type & IFMT);
1213 return (0);
1214 }
1215
1216 /*
1217 * We're allocating what should be a completely-unused inode,
1218 * so make sure we don't inherit anything from any previous
1219 * incarnations.
1220 */
1221 dp = ginode(ino);
1222 (void) memset((void *)dp, 0, sizeof (struct dinode));
1223 dp->di_db[0] = allocblk(1);
1224 if (dp->di_db[0] == 0) {
1225 statemap[ino] = USTATE;
1226 return (0);
1227 }
1228 dp->di_mode = (mode_t)type;
1229 (void) time(&t);
1230 dp->di_atime = (time32_t)t;
1231 dp->di_ctime = dp->di_atime;
1232 dp->di_mtime = dp->di_ctime;
1233 dp->di_size = (u_offset_t)sblock.fs_fsize;
1234 dp->di_blocks = btodb(sblock.fs_fsize);
1235 n_files++;
1236 inodirty();
1237 return (ino);
1238 }
1239
1240 /*
1241 * Release some or all of the blocks of an inode.
1242 * Only truncates down. Assumes new_length is appropriately aligned
1243 * to a block boundary (or a directory block boundary, if it's a
1244 * directory).
1245 *
1246 * If this is a directory, discard all of its contents first, so
1247 * we don't create a bunch of orphans that would need another fsck
1248 * run to clean up.
1249 *
1250 * Even if truncating to zero length, the inode remains allocated.
1251 */
1252 void
truncino(fsck_ino_t ino,offset_t new_length,int update)1253 truncino(fsck_ino_t ino, offset_t new_length, int update)
1254 {
1255 struct inodesc idesc;
1256 struct inoinfo *iip;
1257 struct dinode *dp;
1258 fsck_ino_t parent;
1259 mode_t mode;
1260 caddr_t message;
1261 int isdir, islink;
1262 int ilevel, dblk;
1263
1264 dp = ginode(ino);
1265 mode = (dp->di_mode & IFMT);
1266 isdir = (mode == IFDIR) || (mode == IFATTRDIR);
1267 islink = (mode == IFLNK);
1268
1269 if (isdir) {
1270 /*
1271 * Go with the parent we found by chasing references,
1272 * if we've gotten that far. Otherwise, use what the
1273 * directory itself claims. If there's no ``..'' entry
1274 * in it, give up trying to get the link counts right.
1275 */
1276 if (update == TI_NOPARENT) {
1277 parent = -1;
1278 } else {
1279 iip = getinoinfo(ino);
1280 if (iip != NULL) {
1281 parent = iip->i_parent;
1282 } else {
1283 parent = lookup_dotdot_ino(ino);
1284 if (parent != 0) {
1285 /*
1286 * Make sure that the claimed
1287 * parent actually has a
1288 * reference to us.
1289 */
1290 dp = ginode(parent);
1291 idesc.id_name = lfname;
1292 idesc.id_type = DATA;
1293 idesc.id_func = findino;
1294 idesc.id_number = ino;
1295 idesc.id_fix = DONTKNOW;
1296 if ((ckinode(dp, &idesc,
1297 CKI_TRAVERSE) & FOUND) == 0)
1298 parent = 0;
1299 }
1300 }
1301 }
1302
1303 mark_delayed_inodes(ino, numfrags(&sblock, new_length));
1304 if (parent > 0) {
1305 dp = ginode(parent);
1306 LINK_RANGE(message, dp->di_nlink, -1);
1307 if (message != NULL) {
1308 LINK_CLEAR(message, parent, dp->di_mode,
1309 &idesc);
1310 if (statemap[parent] == USTATE)
1311 goto no_parent_update;
1312 }
1313 TRACK_LNCNTP(parent, lncntp[parent]--);
1314 } else if ((mode == IFDIR) && (parent == 0)) {
1315 /*
1316 * Currently don't have a good way to
1317 * handle this, so throw up our hands.
1318 * However, we know that we can still
1319 * do some good if we continue, so
1320 * don't actually exit yet.
1321 *
1322 * We don't do it for attrdirs,
1323 * because there aren't link counts
1324 * between them and their parents.
1325 */
1326 pwarn("Could not determine former parent of "
1327 "inode %d, link counts are possibly\n"
1328 "incorrect. Please rerun fsck(1M) to "
1329 "correct this.\n",
1330 ino);
1331 iscorrupt = 1;
1332 }
1333 /*
1334 * ...else if it's a directory with parent == -1, then
1335 * we've not gotten far enough to know connectivity,
1336 * and it'll get handled automatically later.
1337 */
1338 }
1339
1340 no_parent_update:
1341 init_inodesc(&idesc);
1342 idesc.id_type = ADDR;
1343 idesc.id_func = pass4check;
1344 idesc.id_number = ino;
1345 idesc.id_fix = DONTKNOW;
1346 idesc.id_truncto = howmany(new_length, sblock.fs_bsize);
1347 dp = ginode(ino);
1348 if (!islink && ckinode(dp, &idesc, CKI_TRUNCATE) & ALTERED)
1349 inodirty();
1350
1351 /*
1352 * This has to be done after ckinode(), so that all of
1353 * the fragments get visited. Note that we assume we're
1354 * always truncating to a block boundary, rather than a
1355 * fragment boundary.
1356 */
1357 dp = ginode(ino);
1358 dp->di_size = new_length;
1359
1360 /*
1361 * Clear now-obsolete pointers.
1362 */
1363 for (dblk = idesc.id_truncto + 1; dblk < NDADDR; dblk++) {
1364 dp->di_db[dblk] = 0;
1365 }
1366
1367 ilevel = get_indir_offsets(-1, idesc.id_truncto, NULL, NULL);
1368 for (ilevel++; ilevel < NIADDR; ilevel++) {
1369 dp->di_ib[ilevel] = 0;
1370 }
1371
1372 inodirty();
1373 }
1374
1375 /*
1376 * Release an inode's resources, then release the inode itself.
1377 */
1378 void
freeino(fsck_ino_t ino,int update_parent)1379 freeino(fsck_ino_t ino, int update_parent)
1380 {
1381 int cg;
1382 struct dinode *dp;
1383 struct cg *cgp;
1384
1385 n_files--;
1386 dp = ginode(ino);
1387 /*
1388 * We need to make sure that the file is really a large file.
1389 * Everything bigger than UFS_MAXOFFSET_T is treated as a file with
1390 * negative size, which shall be cleared. (see verify_inode() in
1391 * pass1.c)
1392 */
1393 if (dp->di_size > (u_offset_t)MAXOFF_T &&
1394 dp->di_size <= (u_offset_t)UFS_MAXOFFSET_T &&
1395 ftypeok(dp) &&
1396 (dp->di_mode & IFMT) != IFBLK &&
1397 (dp->di_mode & IFMT) != IFCHR) {
1398 largefile_count--;
1399 }
1400 truncino(ino, 0, update_parent);
1401
1402 dp = ginode(ino);
1403 if ((dp->di_mode & IFMT) == IFATTRDIR) {
1404 clearshadow(ino, &attrclientinfo);
1405 dp = ginode(ino);
1406 }
1407
1408 clearinode(dp);
1409 inodirty();
1410 statemap[ino] = USTATE;
1411
1412 /*
1413 * Keep the disk in sync with us so that pass5 doesn't get
1414 * upset about spurious inconsistencies.
1415 */
1416 cg = itog(&sblock, ino);
1417 (void) getblk(&cgblk, (diskaddr_t)cgtod(&sblock, cg),
1418 (size_t)sblock.fs_cgsize);
1419 cgp = cgblk.b_un.b_cg;
1420 clrbit(cg_inosused(cgp), ino % sblock.fs_ipg);
1421 cgp->cg_cs.cs_nifree += 1;
1422 cgdirty();
1423 sblock.fs_cstotal.cs_nifree += 1;
1424 sbdirty();
1425 }
1426
1427 void
init_inoinfo(struct inoinfo * inp,struct dinode * dp,fsck_ino_t inum)1428 init_inoinfo(struct inoinfo *inp, struct dinode *dp, fsck_ino_t inum)
1429 {
1430 inp->i_parent = ((inum == UFSROOTINO) ? UFSROOTINO : (fsck_ino_t)0);
1431 inp->i_dotdot = (fsck_ino_t)0;
1432 inp->i_isize = (offset_t)dp->di_size;
1433 inp->i_blkssize = (NDADDR + NIADDR) * sizeof (daddr32_t);
1434 inp->i_extattr = dp->di_oeftflag;
1435 (void) memmove((void *)&inp->i_blks[0], (void *)&dp->di_db[0],
1436 inp->i_blkssize);
1437 }
1438
1439 /*
1440 * Return the inode number in the ".." entry of the provided
1441 * directory inode.
1442 */
1443 static int
lookup_dotdot_ino(fsck_ino_t ino)1444 lookup_dotdot_ino(fsck_ino_t ino)
1445 {
1446 struct inodesc idesc;
1447
1448 init_inodesc(&idesc);
1449 idesc.id_type = DATA;
1450 idesc.id_func = findino;
1451 idesc.id_name = "..";
1452 idesc.id_number = ino;
1453 idesc.id_fix = NOFIX;
1454
1455 if ((ckinode(ginode(ino), &idesc, CKI_TRAVERSE) & FOUND) != 0) {
1456 return (idesc.id_parent);
1457 }
1458
1459 return (0);
1460 }
1461
1462 /*
1463 * Convenience wrapper around ckinode(findino()).
1464 */
1465 int
lookup_named_ino(fsck_ino_t dir,caddr_t name)1466 lookup_named_ino(fsck_ino_t dir, caddr_t name)
1467 {
1468 struct inodesc idesc;
1469
1470 init_inodesc(&idesc);
1471 idesc.id_type = DATA;
1472 idesc.id_func = findino;
1473 idesc.id_name = name;
1474 idesc.id_number = dir;
1475 idesc.id_fix = NOFIX;
1476
1477 if ((ckinode(ginode(dir), &idesc, CKI_TRAVERSE) & FOUND) != 0) {
1478 return (idesc.id_parent);
1479 }
1480
1481 return (0);
1482 }
1483
1484 /*
1485 * Marks inodes that are being orphaned and might need to be reconnected
1486 * by pass4(). The inode we're traversing is the directory whose
1487 * contents will be reconnected later. id_parent is the lfn at which
1488 * to start looking at said contents.
1489 */
1490 static int
mark_a_delayed_inode(struct inodesc * idesc)1491 mark_a_delayed_inode(struct inodesc *idesc)
1492 {
1493 struct direct *dirp = idesc->id_dirp;
1494
1495 if (idesc->id_lbn < idesc->id_parent) {
1496 return (KEEPON);
1497 }
1498
1499 if (dirp->d_ino != 0 &&
1500 strcmp(dirp->d_name, ".") != 0 &&
1501 strcmp(dirp->d_name, "..") != 0) {
1502 statemap[dirp->d_ino] &= ~INFOUND;
1503 statemap[dirp->d_ino] |= INDELAYD;
1504 }
1505
1506 return (KEEPON);
1507 }
1508
1509 static void
mark_delayed_inodes(fsck_ino_t ino,daddr32_t first_lfn)1510 mark_delayed_inodes(fsck_ino_t ino, daddr32_t first_lfn)
1511 {
1512 struct dinode *dp;
1513 struct inodesc idelayed;
1514
1515 init_inodesc(&idelayed);
1516 idelayed.id_number = ino;
1517 idelayed.id_type = DATA;
1518 idelayed.id_fix = NOFIX;
1519 idelayed.id_func = mark_a_delayed_inode;
1520 idelayed.id_parent = first_lfn;
1521 idelayed.id_entryno = 2;
1522
1523 dp = ginode(ino);
1524 (void) ckinode(dp, &idelayed, CKI_TRAVERSE);
1525 }
1526
1527 /*
1528 * Clear the i_oeftflag/extended attribute pointer from INO.
1529 */
1530 void
clearattrref(fsck_ino_t ino)1531 clearattrref(fsck_ino_t ino)
1532 {
1533 struct dinode *dp;
1534
1535 dp = ginode(ino);
1536 if (debug) {
1537 if (dp->di_oeftflag == 0)
1538 (void) printf("clearattref: no attr to clear on %d\n",
1539 ino);
1540 }
1541
1542 dp->di_oeftflag = 0;
1543 inodirty();
1544 }
1545