1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #pragma ident "%Z%%M% %I% %E% SMI"
27
28 /*
29 * Keep track of duplicate fragment references (elsewhere called
30 * blocks for ancient historical reasons).
31 *
32 * The duplicates are kept in a binary tree to attempt to minimize
33 * search times when checking the block lists of all active inodes
34 * for multiple uses. This is opposed to using a simple linear list
35 * that is traversed for every block, as is used in the traditional
36 * fsck. It can be very time-expensive if there's more than just a
37 * very few duplicates, and typically there are either none or lots.
38 *
39 * For each multiply-claimed fragment, we note all of the claiming
40 * inodes and their corresponding logical block numbers. This allows
41 * reporting exactly which parts of which files were damaged, which
42 * provides at least a chance of recovering the bulk of the data on
43 * a seriously-corrupted filesystem.
44 */
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <unistd.h>
48 #include <sys/avl.h>
49 #define _KERNEL
50 #include <sys/fs/ufs_fsdir.h> /* for struct direct */
51 #undef _KERNEL
52 #include <sys/debug.h>
53 #include "fsck.h"
54
55 #define OFFSETOF(type, elt) ((size_t)(&((type *)NULL)->elt))
56
57 /*
58 * For each physical fragment with multiple claimants, the specifics
59 * of each claim are recorded. This means there are N+1 AVL trees in
60 * use: one for each fragment's claimant table, plus one that orders
61 * the fragments themselves.
62 *
63 * The table of fragments simply has the physical fragment number
64 * (pfn) and has the root of the tree of the associated claimants. It
65 * is keyed by the pfn and called dup_frags.
66 *
67 * The subsidiary trees list inodes and logical fragment number (lfn)
68 * for each claimant. They are keyed first by inode number and then
69 * by lfn. Both are needed, as it is possible for one inode to have
70 * multiple claims on the same fragment.
71 */
72
73 typedef struct claimant {
74 fsck_ino_t cl_inode;
75 daddr32_t cl_lfn;
76 avl_node_t cl_avl;
77 } claimant_t;
78
79 typedef struct fragment {
80 daddr32_t fr_pfn;
81 avl_tree_t fr_claimants;
82 avl_node_t fr_avl;
83 } fragment_t;
84
85 typedef struct reference {
86 daddr32_t ref_lfn;
87 daddr32_t ref_pfn;
88 avl_node_t ref_avl;
89 } reference_t;
90
91 typedef struct inode_dup {
92 fsck_ino_t id_ino;
93 avl_tree_t id_fragments;
94 avl_node_t id_avl;
95 } inode_dup_t;
96
97 static avl_tree_t dup_frags;
98
99 static void free_invert_frags(avl_tree_t *);
100 static void report_dup_lfn_pfn(daddr32_t, daddr32_t, daddr32_t, daddr32_t);
101 static inode_dup_t *new_inode_dup(fsck_ino_t);
102 static void invert_frags(avl_tree_t *, avl_tree_t *);
103 static void report_inode_dups(inode_dup_t *);
104 static int by_ino_cmp(const void *, const void *);
105 static int by_lfn_cmp(const void *, const void *);
106 static claimant_t *alloc_claimant(fsck_ino_t, daddr32_t);
107 static fragment_t *alloc_dup(daddr32_t);
108 static int claimant_cmp(const void *, const void *);
109 static int fragment_cmp(const void *, const void *);
110 static int decrement_claimant(fragment_t *, fsck_ino_t, daddr32_t);
111 static int increment_claimant(fragment_t *, fsck_ino_t, daddr32_t);
112
113 /*
114 * Simple accessor function for the outside world so only we need to
115 * see and interpret our data structures.
116 */
117 int
have_dups(void)118 have_dups(void)
119 {
120 return (avl_numnodes(&dup_frags) > 0);
121 }
122
123 /*
124 * Locates, creates, and deletes a record of a duplicate reference.
125 *
126 * For DB_INCR, returns true if the dup was added to the tree.
127 * For DB_DECR, returns true if the dup was in the tree.
128 */
129 int
find_dup_ref(daddr32_t fragno,fsck_ino_t ino,daddr32_t lfn,int flags)130 find_dup_ref(daddr32_t fragno, fsck_ino_t ino, daddr32_t lfn, int flags)
131 {
132 fragment_t key;
133 fragment_t *dup;
134 avl_index_t where;
135 int added = 0;
136 int removed = 0;
137
138 if (avl_first(&dup_frags) == NULL) {
139 if (flags & DB_CREATE)
140 avl_create(&dup_frags, fragment_cmp,
141 sizeof (fragment_t),
142 OFFSETOF(fragment_t, fr_avl));
143 else
144 return (0);
145 }
146
147 key.fr_pfn = fragno;
148 dup = avl_find(&dup_frags, (void *)&key, &where);
149 if ((dup == NULL) & (flags & DB_CREATE)) {
150 dup = alloc_dup(fragno);
151 avl_insert(&dup_frags, (void *)dup, where);
152 }
153
154 if (dup != NULL) {
155 if (flags & DB_INCR) {
156 if (debug)
157 (void) printf(
158 "adding claim by ino %d as lfn %d\n",
159 ino, lfn);
160 added = increment_claimant(dup, ino, lfn);
161 } else if (flags & DB_DECR) {
162 /*
163 * Note that dup may be invalidated by this call.
164 */
165 removed = decrement_claimant(dup, ino, lfn);
166 if (debug)
167 (void) printf(
168 "check for claimant ino %d lfn %d returned %d\n",
169 ino, lfn, removed);
170 }
171 }
172
173 return (added || removed || (dup != NULL));
174 }
175
176 /*
177 * Dump the duplicates table in a relatively user-friendly form.
178 * The idea is that the output can be useful when trying to manually
179 * work out which block belongs to which of the claiming inodes.
180 *
181 * What we have is a tree of duplicates indexed by physical
182 * fragment number. What we want to report is:
183 *
184 * Inode %d:
185 * Logical Offset 0x%08llx, Physical Fragment %d
186 * Logical Offsets 0x%08llx - 0x%08llx, Physical Fragments %d - %d
187 * ...
188 * Inode %d:
189 * Logical Offsets 0x%08llx - 0x%08llx, Physical Fragments %d - %d
190 * ...
191 */
192 int
report_dups(int quiet)193 report_dups(int quiet)
194 {
195 int overlaps;
196 inode_dup_t *inode;
197 fragment_t *dup;
198 avl_tree_t inode_frags;
199
200 overlaps = 0;
201 ASSERT(have_dups());
202 /*
203 * Figure out how many actual dups are still around.
204 * This tells us whether or not we can mark the
205 * filesystem clean.
206 */
207 dup = avl_first(&dup_frags);
208 while (dup != NULL) {
209 if (avl_numnodes(&dup->fr_claimants) > 1) {
210 overlaps++;
211 break;
212 }
213 dup = AVL_NEXT(&dup_frags, dup);
214 }
215
216 /*
217 * Now report on every object that still exists that
218 * had *any* dups associated with it.
219 */
220 if (!quiet) {
221 (void) puts("\nSome blocks that were found to be in "
222 "multiple files are still\nassigned to "
223 "file(s).\nFragments sorted by inode and "
224 "logical offsets:");
225
226 invert_frags(&dup_frags, &inode_frags);
227 inode = avl_first(&inode_frags);
228 while (inode != NULL) {
229 report_inode_dups(inode);
230 inode = AVL_NEXT(&inode_frags, inode);
231 }
232 (void) printf("\n");
233
234 free_invert_frags(&inode_frags);
235 }
236
237 return (overlaps);
238 }
239
240 static void
report_inode_dups(inode_dup_t * inode)241 report_inode_dups(inode_dup_t *inode)
242 {
243 reference_t *dup;
244 daddr32_t first_lfn, last_lfn, first_pfn, last_pfn;
245
246 (void) printf("Inode %d:\n", inode->id_ino);
247 dup = avl_first(&inode->id_fragments);
248 first_lfn = last_lfn = dup->ref_lfn;
249 first_pfn = last_pfn = dup->ref_pfn;
250 while ((dup = AVL_NEXT(&inode->id_fragments, dup)) != NULL) {
251 if (((last_lfn + 1) != dup->ref_lfn) ||
252 ((last_pfn + 1) != dup->ref_pfn)) {
253 report_dup_lfn_pfn(first_lfn, last_lfn,
254 first_pfn, last_pfn);
255 first_lfn = last_lfn = dup->ref_lfn;
256 first_pfn = last_pfn = dup->ref_pfn;
257 }
258 }
259 report_dup_lfn_pfn(first_lfn, last_lfn, first_pfn, last_pfn);
260 }
261
262 static void
report_dup_lfn_pfn(daddr32_t first_lfn,daddr32_t last_lfn,daddr32_t first_pfn,daddr32_t last_pfn)263 report_dup_lfn_pfn(daddr32_t first_lfn, daddr32_t last_lfn,
264 daddr32_t first_pfn, daddr32_t last_pfn)
265 {
266 if ((first_lfn == last_lfn) && (first_pfn == last_pfn)) {
267 (void) printf(
268 " Logical Offset 0x%08llx Physical Fragment %d\n",
269 (longlong_t)first_lfn * sblock.fs_fsize, first_pfn);
270 } else {
271 (void) printf(
272 " Logical Offsets 0x%08llx - 0x%08llx, "
273 "Physical Fragments %d - %d\n",
274 (longlong_t)first_lfn * sblock.fs_fsize,
275 (longlong_t)last_lfn * sblock.fs_fsize,
276 first_pfn, last_pfn);
277 }
278 }
279
280 /*
281 * Given a tree of fragment_ts, each element of which has an integral
282 * sub-tree of claimant_ts, produce a tree of inode_dup_ts, each element
283 * of which has an integral sub-tree of reference_ts.
284 */
285 static void
invert_frags(avl_tree_t * source,avl_tree_t * target)286 invert_frags(avl_tree_t *source, avl_tree_t *target)
287 {
288 fragment_t *src_frag;
289 claimant_t *src_claim;
290 inode_dup_t *tgt_inode;
291 inode_dup_t tgt_inode_key;
292 reference_t *tgt_ref;
293 reference_t tgt_ref_key;
294 avl_index_t where;
295
296 avl_create(target, by_ino_cmp, sizeof (inode_dup_t),
297 OFFSETOF(inode_dup_t, id_avl));
298
299 src_frag = avl_first(source);
300 while (src_frag != NULL) {
301 src_claim = avl_first(&src_frag->fr_claimants);
302 while (src_claim != NULL) {
303 /*
304 * Have we seen this inode before?
305 */
306 tgt_inode_key.id_ino = src_claim->cl_inode;
307 tgt_inode = avl_find(target, (void *)&tgt_inode_key,
308 &where);
309 if (tgt_inode == NULL) {
310 /*
311 * No, so set up a record for it.
312 */
313 tgt_inode = new_inode_dup(src_claim->cl_inode);
314 avl_insert(target, (void *)tgt_inode, where);
315 }
316 /*
317 * Now, how about this logical fragment? In
318 * theory, we should never see a duplicate, since
319 * a given lfn only exists once for a given inode.
320 * As such, we ignore duplicate hits.
321 */
322 tgt_ref_key.ref_lfn = src_claim->cl_lfn;
323 tgt_ref = avl_find(&tgt_inode->id_fragments,
324 (void *)&tgt_ref_key, &where);
325 if (tgt_ref == NULL) {
326 /*
327 * Haven't seen it, add it.
328 */
329 tgt_ref = (reference_t *)malloc(
330 sizeof (reference_t));
331 if (tgt_ref == NULL)
332 errexit("Out of memory in "
333 "invert_frags\n");
334 tgt_ref->ref_lfn = src_claim->cl_lfn;
335 tgt_ref->ref_pfn = src_frag->fr_pfn;
336 avl_insert(&tgt_inode->id_fragments,
337 (void *)tgt_ref, where);
338 }
339 src_claim = AVL_NEXT(&src_frag->fr_claimants,
340 src_claim);
341 }
342 src_frag = AVL_NEXT(source, src_frag);
343 }
344 }
345
346 /*
347 * Discard memory associated with the inverted fragments tree created
348 * by report_dups() via invert_frags().
349 */
350 static void
free_invert_frags(avl_tree_t * tree)351 free_invert_frags(avl_tree_t *tree)
352 {
353 void *outer = NULL; /* traversal cookie */
354 void *inner; /* traversal cookie */
355 inode_dup_t *inode_dup;
356 reference_t *ref_dup;
357
358 while ((inode_dup = avl_destroy_nodes(tree, &outer)) != NULL) {
359 inner = NULL;
360 while ((ref_dup = avl_destroy_nodes(&inode_dup->id_fragments,
361 &inner)) != NULL) {
362 free((void *)ref_dup);
363 }
364 avl_destroy(&inode_dup->id_fragments);
365 free((void *)inode_dup);
366 }
367 avl_destroy(tree);
368 }
369
370 /*
371 * Discard all memory allocations associated with the current duplicates
372 * table.
373 */
374 void
free_dup_state(void)375 free_dup_state(void)
376 {
377 void *dup_cookie = NULL;
378 void *claim_cookie;
379 fragment_t *fragv;
380 claimant_t *claimv;
381
382 while ((fragv = avl_destroy_nodes(&dup_frags, &dup_cookie)) != NULL) {
383 claim_cookie = NULL;
384 while ((claimv = avl_destroy_nodes(&fragv->fr_claimants,
385 &claim_cookie)) != NULL) {
386 free((void *)claimv);
387 }
388 avl_destroy(&fragv->fr_claimants);
389 free((void *)fragv);
390 }
391 avl_destroy(&dup_frags);
392 }
393
394 /*
395 * If the given claimant has not been seen before, add it to DUP's
396 * list of them. It's not fatal for the same PFN/INODE/LFN to get
397 * added twice, because pass1b() will add the same dups that pass1()
398 * did, plus one.
399 */
400 static int
increment_claimant(fragment_t * dup,fsck_ino_t ino,daddr32_t lfn)401 increment_claimant(fragment_t *dup, fsck_ino_t ino, daddr32_t lfn)
402 {
403 avl_index_t where;
404 claimant_t *claimant;
405 claimant_t key;
406 int added = 0;
407
408 key.cl_inode = ino;
409 key.cl_lfn = lfn;
410 claimant = avl_find(&dup->fr_claimants, &key, &where);
411 if (claimant == NULL) {
412 if (debug)
413 (void) printf("inserting claimant\n");
414 claimant = alloc_claimant(ino, lfn);
415 avl_insert(&dup->fr_claimants, (void *)claimant, where);
416 statemap[ino] |= INCLEAR;
417 /*
418 * If the inode is to be cleared and has zero links then remove
419 * the zero link bit as it will be cleared anyway. If INZLINK
420 * is being removed and it's a directory inode then add the
421 * inode to the orphan directory list.
422 */
423 if (statemap[ino] & INZLINK) {
424 statemap[ino] &= ~INZLINK;
425 if (statemap[ino] & DSTATE) {
426 add_orphan_dir(ino);
427 }
428 }
429 added = 1;
430 }
431
432 return (added);
433 }
434
435 /*
436 * If the given claimant is on DUP's list, remove it. It is not
437 * an error for the claimant to not be on the list.
438 */
439 static int
decrement_claimant(fragment_t * dup,fsck_ino_t ino,daddr32_t lfn)440 decrement_claimant(fragment_t *dup, fsck_ino_t ino, daddr32_t lfn)
441 {
442 avl_index_t where;
443 claimant_t *claimant;
444 claimant_t key;
445 int busy = 0;
446
447 key.cl_inode = ino;
448 key.cl_lfn = lfn;
449 claimant = avl_find(&dup->fr_claimants, &key, &where);
450 if (claimant != NULL) {
451 avl_remove(&dup->fr_claimants, claimant);
452 if (avl_numnodes(&dup->fr_claimants) == 0) {
453 avl_destroy(&dup->fr_claimants);
454 avl_remove(&dup_frags, (void *)dup);
455 free((void *)dup);
456 } else {
457 busy = 1;
458 }
459 }
460
461 return (busy);
462 }
463
464 static claimant_t *
alloc_claimant(fsck_ino_t inode,daddr32_t lfn)465 alloc_claimant(fsck_ino_t inode, daddr32_t lfn)
466 {
467 claimant_t *new = (claimant_t *)malloc(sizeof (claimant_t));
468
469 if (new == NULL)
470 errexit("Out of memory in alloc_claimant()\n");
471
472 new->cl_inode = inode;
473 new->cl_lfn = lfn;
474
475 return (new);
476 }
477
478 static fragment_t *
alloc_dup(daddr32_t pfn)479 alloc_dup(daddr32_t pfn)
480 {
481 fragment_t *new = (fragment_t *)malloc(sizeof (fragment_t));
482
483 if (new == NULL)
484 errexit("Out of memory in alloc_dup()\n");
485
486 new->fr_pfn = pfn;
487 avl_create(&new->fr_claimants, claimant_cmp, sizeof (fragment_t),
488 OFFSETOF(claimant_t, cl_avl));
489
490 return (new);
491 }
492
493 /*
494 * Compare two fragment_t instances for avl_find(). It requires a
495 * return value of -1/0/1, so we can't just hand back left - right.
496 */
497 static int
fragment_cmp(const void * vlp,const void * vrp)498 fragment_cmp(const void *vlp, const void *vrp)
499 {
500 const fragment_t *lp = (const fragment_t *)vlp;
501 const fragment_t *rp = (const fragment_t *)vrp;
502 int cmp = lp->fr_pfn - rp->fr_pfn;
503
504 if (cmp < 0)
505 cmp = -1;
506 else if (cmp > 0)
507 cmp = 1;
508
509 return (cmp);
510 }
511
512 /*
513 * Compare two claimant_t instances for avl_find(). It requires a
514 * return value of -1/0/1, so we can't just hand back left - right.
515 */
516 static int
claimant_cmp(const void * vlp,const void * vrp)517 claimant_cmp(const void *vlp, const void *vrp)
518 {
519 const claimant_t *lp = (const claimant_t *)vlp;
520 const claimant_t *rp = (const claimant_t *)vrp;
521 int cmp;
522
523 cmp = lp->cl_inode - rp->cl_inode;
524 if (cmp == 0) {
525 /*
526 * lfn < 0 is a wildcard lfn match.
527 */
528 if ((lp->cl_lfn >= 0) && (rp->cl_lfn >= 0))
529 cmp = lp->cl_lfn - rp->cl_lfn;
530 }
531
532 if (cmp < 0)
533 cmp = -1;
534 else if (cmp > 0)
535 cmp = 1;
536
537 return (cmp);
538 }
539
540 static int
by_ino_cmp(const void * vlp,const void * vrp)541 by_ino_cmp(const void *vlp, const void *vrp)
542 {
543 const inode_dup_t *lp = (const inode_dup_t *)vlp;
544 const inode_dup_t *rp = (const inode_dup_t *)vrp;
545 int cmp;
546
547 cmp = lp->id_ino - rp->id_ino;
548
549 if (cmp < 0)
550 cmp = -1;
551 else if (cmp > 0)
552 cmp = 1;
553
554 return (cmp);
555 }
556
557 static int
by_lfn_cmp(const void * vlp,const void * vrp)558 by_lfn_cmp(const void *vlp, const void *vrp)
559 {
560 const reference_t *lp = (const reference_t *)vlp;
561 const reference_t *rp = (const reference_t *)vrp;
562 int cmp;
563
564 cmp = lp->ref_lfn - rp->ref_lfn;
565
566 if (cmp < 0)
567 cmp = -1;
568 else if (cmp > 0)
569 cmp = 1;
570
571 return (cmp);
572 }
573
574 static inode_dup_t *
new_inode_dup(fsck_ino_t inode)575 new_inode_dup(fsck_ino_t inode)
576 {
577 inode_dup_t *new;
578
579 new = (inode_dup_t *)malloc(sizeof (inode_dup_t));
580 if (new == NULL)
581 errexit("Out of memory in new_inode_dup\n");
582 new->id_ino = inode;
583 avl_create(&new->id_fragments, by_lfn_cmp, sizeof (reference_t),
584 OFFSETOF(reference_t, ref_avl));
585
586 return (new);
587 }
588