xref: /titanic_41/usr/src/cmd/fs.d/ufs/fsck/pass5.c (revision 6a634c9dca3093f3922e4b7ab826d7bdf17bf78e)
1 /*
2  * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
3  */
4 
5 /*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
6 /*	  All Rights Reserved  	*/
7 
8 /*
9  * Copyright (c) 1980, 1986, 1990 The Regents of the University of California.
10  * All rights reserved.
11  *
12  * Redistribution and use in source and binary forms are permitted
13  * provided that: (1) source distributions retain this entire copyright
14  * notice and comment, and (2) distributions including binaries display
15  * the following acknowledgement:  ``This product includes software
16  * developed by the University of California, Berkeley and its contributors''
17  * in the documentation or other materials provided with the distribution
18  * and in all advertising materials mentioning features or use of this
19  * software. Neither the name of the University nor the names of its
20  * contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
23  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
24  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
25  */
26 
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <unistd.h>
30 #include <string.h>
31 #include <sys/param.h>
32 #include <sys/mntent.h>
33 #include <sys/fs/ufs_fs.h>
34 #include <sys/vnode.h>
35 #include <sys/fs/ufs_inode.h>
36 #include "fsck.h"
37 
38 static int check_maps(uchar_t *, uchar_t *, int, int, char *, int, int);
39 
40 void
pass5(void)41 pass5(void)
42 {
43 	caddr_t err;
44 	int32_t c, blk, frags;
45 	size_t	basesize, sumsize, mapsize;
46 	int excessdirs;
47 	int inomapsize, blkmapsize;
48 	int update_csums, update_bitmaps;
49 	int bad_csum_sb, bad_csum_cg, bad_cgblks_cg, bad_cgblktot_cg;
50 	struct fs *fs = &sblock;
51 	struct cg *cg = &cgrp;
52 	diskaddr_t dbase, dmax;
53 	diskaddr_t d;
54 	uint64_t i, j;
55 	struct csum *cs;
56 	struct csum backup_cs;
57 	time_t now;
58 	struct csum cstotal;
59 	struct inodesc idesc;
60 	union {				/* keep lint happy about alignment */
61 		struct cg cg;		/* the rest of buf has the bitmaps */
62 		char buf[MAXBSIZE];
63 	} u;
64 	caddr_t buf = u.buf;
65 	struct cg *newcg = &u.cg;
66 
67 	(void) memset((void *)buf, 0, sizeof (u.buf));
68 	newcg->cg_niblk = fs->fs_ipg;
69 
70 	if (fs->fs_postblformat != FS_DYNAMICPOSTBLFMT) {
71 		pfatal("UNSUPPORTED ROTATIONAL TABLE FORMAT %d\n",
72 		    fs->fs_postblformat);
73 		errexit("Program terminated.");
74 		/* NOTREACHED */
75 	}
76 
77 	/* LINTED this subtraction can't overflow and is int32-aligned */
78 	basesize = &newcg->cg_space[0] - (uchar_t *)newcg;
79 
80 	/*
81 	 * We reserve the space for the old rotation summary
82 	 * tables for the benefit of old kernels, but do not
83 	 * maintain them in modern kernels. In time, they could
84 	 * theoretically go away, if we wanted to deal with
85 	 * changing the on-disk format.
86 	 */
87 
88 	/*
89 	 * Note that we don't use any of the cg_*() macros until
90 	 * after cg_sanity() has approved of what we've got.
91 	 */
92 	newcg->cg_btotoff = basesize;
93 	newcg->cg_boff = newcg->cg_btotoff + fs->fs_cpg * sizeof (daddr32_t);
94 	newcg->cg_iusedoff = newcg->cg_boff +
95 	    fs->fs_cpg * fs->fs_nrpos * sizeof (uint16_t);
96 	(void) memset(&newcg->cg_space[0], 0, newcg->cg_iusedoff - basesize);
97 
98 	inomapsize = howmany(fs->fs_ipg, NBBY);
99 	newcg->cg_freeoff = newcg->cg_iusedoff + inomapsize;
100 	blkmapsize = howmany(fs->fs_fpg, NBBY);
101 	newcg->cg_nextfreeoff = newcg->cg_freeoff + blkmapsize;
102 	newcg->cg_magic = CG_MAGIC;
103 
104 	sumsize = newcg->cg_iusedoff - newcg->cg_btotoff;
105 	mapsize = newcg->cg_nextfreeoff - newcg->cg_iusedoff;
106 
107 	init_inodesc(&idesc);
108 	idesc.id_type = ADDR;
109 	(void) memset((void *)&cstotal, 0, sizeof (struct csum));
110 	now = time(NULL);
111 
112 	/*
113 	 * If the last fragments in the file system don't make up a
114 	 * full file system block, mark the bits in the blockmap
115 	 * that correspond to those missing fragments as "allocated",
116 	 * so that the last block doesn't get counted as a free block
117 	 * and those missing fragments don't get counted as free frags.
118 	 */
119 	j = blknum(fs, (uint64_t)fs->fs_size + fs->fs_frag - 1);
120 	for (i = fs->fs_size; i < j; i++)
121 		setbmap(i);
122 
123 	/*
124 	 * The cg summaries are not always updated when using
125 	 * logging.  Since we're really concerned with getting a
126 	 * sane filesystem, rather than in trying to debug UFS
127 	 * corner cases, logically we would just always recompute
128 	 * them.  However, it is disconcerting to users to be asked
129 	 * about updating the summaries when, from their point of
130 	 * view, there's been no indication of a problem up to this
131 	 * point.  So, only do it if we find a discrepancy.
132 	 */
133 	update_csums = -1;
134 	update_bitmaps = 0;
135 	for (c = 0; c < fs->fs_ncg; c++) {
136 		backup_cs = cstotal;
137 
138 		/*
139 		 * cg_sanity() will catch i/o errors for us.
140 		 */
141 		(void) getblk(&cgblk, (diskaddr_t)cgtod(fs, c),
142 		    (size_t)fs->fs_cgsize);
143 		err = cg_sanity(cg, c);
144 		if (err != NULL) {
145 			pfatal("CG %d: %s\n", c, err);
146 			free((void *)err);
147 			if (reply("REPAIR") == 0)
148 				errexit("Program terminated.");
149 			fix_cg(cg, c);
150 		}
151 		/*
152 		 * If the on-disk timestamp is in the future, then it
153 		 * by definition is wrong.  Otherwise, if it's in
154 		 * the past, then use that value so that we don't
155 		 * declare a spurious mismatch.
156 		 */
157 		if (now > cg->cg_time)
158 			newcg->cg_time = cg->cg_time;
159 		else
160 			newcg->cg_time = now;
161 		newcg->cg_cgx = c;
162 		dbase = cgbase(fs, c);
163 		dmax = dbase + fs->fs_fpg;
164 		if (dmax > fs->fs_size)
165 			dmax = fs->fs_size;
166 		newcg->cg_ndblk = dmax - dbase;
167 		if (c == fs->fs_ncg - 1)
168 			newcg->cg_ncyl = fs->fs_ncyl - (fs->fs_cpg * c);
169 		else
170 			newcg->cg_ncyl = fs->fs_cpg;
171 		newcg->cg_niblk = sblock.fs_ipg;
172 		newcg->cg_cs.cs_ndir = 0;
173 		newcg->cg_cs.cs_nffree = 0;
174 		newcg->cg_cs.cs_nbfree = 0;
175 		newcg->cg_cs.cs_nifree = fs->fs_ipg;
176 		if ((cg->cg_rotor >= 0) && (cg->cg_rotor < newcg->cg_ndblk))
177 			newcg->cg_rotor = cg->cg_rotor;
178 		else
179 			newcg->cg_rotor = 0;
180 		if ((cg->cg_frotor >= 0) && (cg->cg_frotor < newcg->cg_ndblk))
181 			newcg->cg_frotor = cg->cg_frotor;
182 		else
183 			newcg->cg_frotor = 0;
184 		if ((cg->cg_irotor >= 0) && (cg->cg_irotor < newcg->cg_niblk))
185 			newcg->cg_irotor = cg->cg_irotor;
186 		else
187 			newcg->cg_irotor = 0;
188 		(void) memset((void *)&newcg->cg_frsum[0], 0,
189 		    sizeof (newcg->cg_frsum));
190 		(void) memset((void *)cg_inosused(newcg), 0, (size_t)mapsize);
191 		/* LINTED macro is int32-aligned per newcg->cg_btotoff above */
192 		(void) memset((void *)&cg_blktot(newcg)[0], 0,
193 		    sumsize + mapsize);
194 		j = fs->fs_ipg * c;
195 		for (i = 0; i < fs->fs_ipg; j++, i++) {
196 			switch (statemap[j] & ~(INORPHAN | INDELAYD)) {
197 
198 			case USTATE:
199 				break;
200 
201 			case DSTATE:
202 			case DCLEAR:
203 			case DFOUND:
204 			case DZLINK:
205 				newcg->cg_cs.cs_ndir++;
206 				/* FALLTHROUGH */
207 
208 			case FSTATE:
209 			case FCLEAR:
210 			case FZLINK:
211 			case SSTATE:
212 			case SCLEAR:
213 				newcg->cg_cs.cs_nifree--;
214 				setbit(cg_inosused(newcg), i);
215 				break;
216 
217 			default:
218 				if (j < UFSROOTINO)
219 					break;
220 				errexit("BAD STATE 0x%x FOR INODE I=%d",
221 				    statemap[j], (int)j);
222 			}
223 		}
224 		if (c == 0) {
225 			for (i = 0; i < UFSROOTINO; i++) {
226 				setbit(cg_inosused(newcg), i);
227 				newcg->cg_cs.cs_nifree--;
228 			}
229 		}
230 		/*
231 		 * Count up what fragments and blocks are free, and
232 		 * reflect the relevant section of blockmap[] into
233 		 * newcg's map.
234 		 */
235 		for (i = 0, d = dbase;
236 		    d < dmax;
237 		    d += fs->fs_frag, i += fs->fs_frag) {
238 			frags = 0;
239 			for (j = 0; j < fs->fs_frag; j++) {
240 				if (testbmap(d + j))
241 					continue;
242 				setbit(cg_blksfree(newcg), i + j);
243 				frags++;
244 			}
245 			if (frags == fs->fs_frag) {
246 				newcg->cg_cs.cs_nbfree++;
247 				j = cbtocylno(fs, i);
248 				/* LINTED macro is int32-aligned per above */
249 				cg_blktot(newcg)[j]++;
250 				/* LINTED cg_blks(newcg) is aligned */
251 				cg_blks(fs, newcg, j)[cbtorpos(fs, i)]++;
252 			} else if (frags > 0) {
253 				newcg->cg_cs.cs_nffree += frags;
254 				blk = blkmap(fs, cg_blksfree(newcg), i);
255 				fragacct(fs, blk, newcg->cg_frsum, 1);
256 			}
257 		}
258 		cstotal.cs_nffree += newcg->cg_cs.cs_nffree;
259 		cstotal.cs_nbfree += newcg->cg_cs.cs_nbfree;
260 		cstotal.cs_nifree += newcg->cg_cs.cs_nifree;
261 		cstotal.cs_ndir += newcg->cg_cs.cs_ndir;
262 
263 		/*
264 		 * Note that, just like the kernel, we dynamically
265 		 * allocated an array to hold the csums and stuffed
266 		 * the pointer into the in-core superblock's fs_u.fs_csp
267 		 * field.  This means that the fs_u field contains a
268 		 * random value when the disk version is examined, but
269 		 * fs_cs() gives us a valid pointer nonetheless.
270 		 * We need to compare the recalculated summaries to
271 		 * both the superblock version and the on disk version.
272 		 * If either is bad, copy the calculated version over
273 		 * the corrupt values.
274 		 */
275 
276 		cs = &fs->fs_cs(fs, c);
277 		bad_csum_sb = (memcmp((void *)cs, (void *)&newcg->cg_cs,
278 		    sizeof (*cs)) != 0);
279 
280 		bad_csum_cg = (memcmp((void *)&cg->cg_cs, (void *)&newcg->cg_cs,
281 		    sizeof (struct csum)) != 0);
282 
283 		/*
284 		 * Has the user told us what to do yet?  If not, find out.
285 		 */
286 		if ((bad_csum_sb || bad_csum_cg) && (update_csums == -1)) {
287 			if (preen) {
288 				update_csums = 1;
289 				(void) printf("CORRECTING BAD CG SUMMARIES"
290 				    " FOR CG %d\n", c);
291 			} else if (update_csums == -1) {
292 				update_csums = (reply(
293 				    "CORRECT BAD CG SUMMARIES FOR CG %d",
294 				    c) == 1);
295 			}
296 		}
297 
298 		if (bad_csum_sb && (update_csums == 1)) {
299 			(void) memmove((void *)cs, (void *)&newcg->cg_cs,
300 			    sizeof (*cs));
301 			sbdirty();
302 			(void) printf("CORRECTED SUPERBLOCK SUMMARIES FOR"
303 			    " CG %d\n", c);
304 		}
305 
306 		if (bad_csum_cg && (update_csums == 1)) {
307 			(void) memmove((void *)cg, (void *)newcg,
308 			    (size_t)basesize);
309 			/* LINTED per cg_sanity() */
310 			(void) memmove((void *)&cg_blktot(cg)[0],
311 			    /* LINTED macro aligned as above */
312 			    (void *)&cg_blktot(newcg)[0], sumsize);
313 			cgdirty();
314 			(void) printf("CORRECTED SUMMARIES FOR CG %d\n", c);
315 		}
316 
317 		excessdirs = cg->cg_cs.cs_ndir - newcg->cg_cs.cs_ndir;
318 		if (excessdirs < 0) {
319 			pfatal("LOST %d DIRECTORIES IN CG %d\n",
320 			    -excessdirs, c);
321 			excessdirs = 0;
322 		}
323 		if (excessdirs > 0) {
324 			if (check_maps((uchar_t *)cg_inosused(newcg),
325 			    (uchar_t *)cg_inosused(cg), inomapsize,
326 			    cg->cg_cgx * fs->fs_ipg, "DIR", 0, excessdirs)) {
327 				if (!verbose)
328 					(void) printf("DIR BITMAP WRONG ");
329 				if (preen || update_bitmaps ||
330 				    reply("FIX") == 1) {
331 					(void) memmove((void *)cg_inosused(cg),
332 					    (void *)cg_inosused(newcg),
333 					    inomapsize);
334 					cgdirty();
335 					if (preen ||
336 					    (!verbose && update_bitmaps))
337 						(void) printf("(CORRECTED)\n");
338 					update_bitmaps = 1;
339 				}
340 			}
341 		}
342 
343 		if (check_maps((uchar_t *)cg_inosused(newcg),
344 		    (uchar_t *)cg_inosused(cg), inomapsize,
345 		    cg->cg_cgx * fs->fs_ipg, "FILE", excessdirs, fs->fs_ipg)) {
346 			if (!verbose)
347 				(void) printf("FILE BITMAP WRONG ");
348 			if (preen || update_bitmaps || reply("FIX") == 1) {
349 				(void) memmove((void *)cg_inosused(cg),
350 				    (void *)cg_inosused(newcg), inomapsize);
351 				cgdirty();
352 				if (preen ||
353 				    (!verbose && update_bitmaps))
354 					(void) printf("(CORRECTED)\n");
355 				update_bitmaps = 1;
356 			}
357 		}
358 
359 		if (check_maps((uchar_t *)cg_blksfree(cg),
360 		    (uchar_t *)cg_blksfree(newcg), blkmapsize,
361 		    cg->cg_cgx * fs->fs_fpg, "FRAG", 0, fs->fs_fpg)) {
362 			if (!verbose)
363 				(void) printf("FRAG BITMAP WRONG ");
364 			if (preen || update_bitmaps || reply("FIX") == 1) {
365 				(void) memmove((void *)cg_blksfree(cg),
366 				    (void *)cg_blksfree(newcg), blkmapsize);
367 				cgdirty();
368 				if (preen ||
369 				    (!verbose && update_bitmaps))
370 					(void) printf("(CORRECTED)\n");
371 				update_bitmaps = 1;
372 			}
373 		}
374 
375 		bad_cgblks_cg = (memcmp((void *)&cg_blks(fs, cg, 0)[0],
376 		    (void *)&cg_blks(fs, newcg, 0)[0],
377 		    fs->fs_cpg * fs->fs_nrpos * sizeof (int16_t)) != 0);
378 
379 		if (bad_cgblks_cg) {
380 			if (!verbose)
381 				(void) printf("ROTATIONAL POSITIONS "
382 				    "BLOCK COUNT WRONG ");
383 			if (preen || update_bitmaps || reply("FIX") == 1) {
384 				(void) memmove((void *)&cg_blks(fs, cg, 0)[0],
385 				    (void *)&cg_blks(fs, newcg, 0)[0],
386 				    fs->fs_cpg * fs->fs_nrpos *
387 				    sizeof (int16_t));
388 				cgdirty();
389 				if (preen ||
390 				    (!verbose && update_bitmaps))
391 					(void) printf("(CORRECTED)\n");
392 				update_bitmaps = 1;
393 			}
394 		}
395 
396 		bad_cgblktot_cg = (memcmp((void *)&cg_blktot(cg)[0],
397 		    (void *)&cg_blktot(newcg)[0],
398 		    fs->fs_cpg * sizeof (int32_t)) != 0);
399 
400 		if (bad_cgblktot_cg) {
401 			if (!verbose)
402 				(void) printf("ROTATIONAL POSITIONS "
403 				    "BLOCK TOTAL WRONG ");
404 			if (preen || update_bitmaps || reply("FIX") == 1) {
405 				(void) memmove((void *)&cg_blktot(cg)[0],
406 				    (void *)&cg_blktot(newcg)[0],
407 				    fs->fs_cpg * sizeof (int32_t));
408 				cgdirty();
409 				if (preen ||
410 				    (!verbose && update_bitmaps))
411 					(void) printf("(CORRECTED)\n");
412 				update_bitmaps = 1;
413 			}
414 		}
415 
416 		/*
417 		 * Fixing one set of problems often shows up more in the
418 		 * same cg.  Just to make sure, go back and check it
419 		 * again if we found something this time through.
420 		 */
421 		if (cgisdirty()) {
422 			cgflush();
423 			cstotal = backup_cs;
424 			c--;
425 		}
426 	}
427 
428 	if ((fflag || !(islog && islogok)) &&
429 	    (memcmp((void *)&cstotal, (void *)&fs->fs_cstotal,
430 	    sizeof (struct csum)) != 0)) {
431 		if (dofix(&idesc, "CORRECT GLOBAL SUMMARY")) {
432 			(void) memmove((void *)&fs->fs_cstotal,
433 			    (void *)&cstotal, sizeof (struct csum));
434 			fs->fs_ronly = 0;
435 			fs->fs_fmod = 0;
436 			sbdirty();
437 		} else {
438 			iscorrupt = 1;
439 		}
440 	}
441 }
442 
443 /*
444  * Compare two allocation bitmaps, reporting any discrepancies.
445  *
446  * If a mismatch is found, if the bit is set in map1, it's considered
447  * to be an indication that the corresponding resource is supposed
448  * to be free, but isn't.  Otherwise, it's considered marked as allocated
449  * but not found to be so.  In other words, if the two maps being compared
450  * use a set bit to indicate something is free, pass the on-disk map
451  * first.  Otherwise, pass the calculated map first.
452  */
453 static int
check_maps(uchar_t * map1,uchar_t * map2,int mapsize,int startvalue,char * name,int skip,int limit)454 check_maps(
455 	uchar_t *map1,	/* map of claimed allocations */
456 	uchar_t *map2,	/* map of determined allocations */
457 	int mapsize,	/* size of above two maps */
458 	int startvalue,	/* resource value for first element in map */
459 	char *name,	/* name of resource found in maps */
460 	int skip,	/* number of entries to skip before starting to free */
461 	int limit)	/* limit on number of entries to free */
462 {
463 	long i, j, k, l, m, n, size;
464 	int astart, aend, ustart, uend;
465 	int mismatch;
466 
467 	mismatch = 0;
468 	astart = ustart = aend = uend = -1;
469 	for (i = 0; i < mapsize; i++) {
470 		j = *map1++;
471 		k = *map2++;
472 		if (j == k)
473 			continue;
474 		for (m = 0, l = 1; m < NBBY; m++, l <<= 1) {
475 			if ((j & l) == (k & l))
476 				continue;
477 			n = startvalue + i * NBBY + m;
478 			if ((j & l) != 0) {
479 				if (astart == -1) {
480 					astart = aend = n;
481 					continue;
482 				}
483 				if (aend + 1 == n) {
484 					aend = n;
485 					continue;
486 				}
487 				if (verbose) {
488 					if (astart == aend)
489 						pwarn(
490 			    "ALLOCATED %s %d WAS MARKED FREE ON DISK\n",
491 						    name, astart);
492 					else
493 						pwarn(
494 			    "ALLOCATED %sS %d-%d WERE MARKED FREE ON DISK\n",
495 						    name, astart, aend);
496 				}
497 				mismatch = 1;
498 				astart = aend = n;
499 			} else {
500 				if (ustart == -1) {
501 					ustart = uend = n;
502 					continue;
503 				}
504 				if (uend + 1 == n) {
505 					uend = n;
506 					continue;
507 				}
508 				size = uend - ustart + 1;
509 				if (size <= skip) {
510 					skip -= size;
511 					ustart = uend = n;
512 					continue;
513 				}
514 				if (skip > 0) {
515 					ustart += skip;
516 					size -= skip;
517 					skip = 0;
518 				}
519 				if (size > limit)
520 					size = limit;
521 				if (verbose) {
522 					if (size == 1)
523 						pwarn(
524 			    "UNALLOCATED %s %d WAS MARKED USED ON DISK\n",
525 						    name, ustart);
526 					else
527 						pwarn(
528 			    "UNALLOCATED %sS %d-%ld WERE MARKED USED ON DISK\n",
529 						    name, ustart,
530 						    ustart + size - 1);
531 				}
532 				mismatch = 1;
533 				limit -= size;
534 				if (limit <= 0)
535 					return (mismatch);
536 				ustart = uend = n;
537 			}
538 		}
539 	}
540 	if (astart != -1) {
541 		if (verbose) {
542 			if (astart == aend)
543 				pwarn(
544 			    "ALLOCATED %s %d WAS MARKED FREE ON DISK\n",
545 				    name, astart);
546 			else
547 				pwarn(
548 			    "ALLOCATED %sS %d-%d WERE MARKED FREE ON DISK\n",
549 				    name, astart, aend);
550 		}
551 		mismatch = 1;
552 	}
553 	if (ustart != -1) {
554 		size = uend - ustart + 1;
555 		if (size <= skip)
556 			return (mismatch);
557 		if (skip > 0) {
558 			ustart += skip;
559 			size -= skip;
560 		}
561 		if (size > limit)
562 			size = limit;
563 		if (verbose) {
564 			if (size == 1)
565 				pwarn(
566 			    "UNALLOCATED %s %d WAS MARKED USED ON DISK\n",
567 				    name, ustart);
568 			else
569 				pwarn(
570 		    "UNALLOCATED %sS %d-%ld WERE MARKED USED ON DISK\n",
571 				    name, ustart, ustart + size - 1);
572 		}
573 		mismatch = 1;
574 	}
575 	return (mismatch);
576 }
577